FusedView.java example

Explorer
database-master
/*

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     licenses@blazegraph.com

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*/
/*
 * Created on Feb 11, 2008
 */

package com.bigdata.btree.view;

import java.lang.ref.WeakReference;
import java.util.Arrays;
import java.util.Iterator;

import org.apache.log4j.Logger;

import com.bigdata.btree.AbstractBTree;
import com.bigdata.btree.BTree;
import com.bigdata.btree.IAutoboxBTree;
import com.bigdata.btree.IBloomFilter;
import com.bigdata.btree.ICounter;
import com.bigdata.btree.IIndex;
import com.bigdata.btree.ILinearList;
import com.bigdata.btree.ILocalBTreeView;
import com.bigdata.btree.IRangeQuery;
import com.bigdata.btree.ITuple;
import com.bigdata.btree.ITupleCursor;
import com.bigdata.btree.ITupleIterator;
import com.bigdata.btree.ITupleSerializer;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.IndexSegment;
import com.bigdata.btree.IndexSegmentStore;
import com.bigdata.btree.ReadOnlyIndex;
import com.bigdata.btree.Tuple;
import com.bigdata.btree.filter.Reverserator;
import com.bigdata.btree.filter.TupleRemover;
import com.bigdata.btree.filter.WrappedTupleIterator;
import com.bigdata.btree.isolation.IsolatedFusedView;
import com.bigdata.btree.proc.AbstractKeyArrayIndexProcedureConstructor;
import com.bigdata.btree.proc.IKeyRangeIndexProcedure;
import com.bigdata.btree.proc.IResultHandler;
import com.bigdata.btree.proc.ISimpleIndexProcedure;
import com.bigdata.counters.CounterSet;
import com.bigdata.mdi.IResourceMetadata;
import com.bigdata.mdi.LocalPartitionMetadata;
import com.bigdata.relation.accesspath.AccessPath;
import com.bigdata.service.MetadataService;
import com.bigdata.service.Split;

import cutthecrap.utils.striterators.IFilter;

/**
 * <p>
 * A fused view providing read-write operations on multiple B+-Trees mapping
 * variable length unsigned byte[] keys to arbitrary values. The sources MUST
 * support deletion markers. The order of the sources MUST correspond to the
 * recency of their data. Writes will be directed to the first source in the
 * sequence (the most recent source). Deletion markers are used to prevent a
 * miss on a key for a source from reading through to an older source. If a
 * deletion marker is encountered the index entry will be understood as "not
 * found" in the fused view rather than reading through to an older source where
 * it might still have a binding.
 * </p>
 * 
 * @todo consider implementing {@link IAutoboxBTree} here and collapsing
 *       {@link ILocalBTreeView} and {@link IAutoboxBTree}.
 * 
 * @todo Can I implement {@link ILinearList} here? That would make it possible
 *       to use keyAt() and indexOf() and might pave the way for a
 *       {@link MetadataService} that supports overflow since the index segments
 *       could be transparent at that point.
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 */
public class FusedView implements IIndex, ILocalBTreeView {//, IValueAge {

    protected static final Logger log = Logger.getLogger(FusedView.class);

    /**
     * Error message if the view has more than {@link Long#MAX_VALUE} elements
     * and you requested an exact range count.
     */
    static protected transient final String ERR_RANGE_COUNT_EXCEEDS_MAX_LONG = "The range count can not be expressed as a 64-bit signed integer";

    /**
     * Encapsulates the sources.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
     */
    private interface ISources extends Iterable<AbstractBTree> {

        /**
         * The mutable {@link BTree} for the view.
         */
        public BTree getMutableBTree();

        /**
         * The #of sources in the view.
         */
        public int getSourceCount();

        /**
         * Visits the sources in order.
         */
        @Override
        public Iterator<AbstractBTree> iterator();
        
        /**
         * Cloned copy of the sources objects.
         */
        public AbstractBTree[] getSources();

    }
    
    /**
     * Implementation based on a hard reference array which directly captures
     * the {@link AbstractBTree}[].
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
     */
    private static class HardRefSources implements ISources {
        
        /**
         * A hard reference to the mutable {@link BTree} from index zero of the
         * sources specified to the ctor.
         */
        private final BTree btree;

        @Override
        public BTree getMutableBTree() {
            return btree;
        }
        
        @Override
        public int getSourceCount() {
            return srcs.length;
        }

        @Override
        public Iterator<AbstractBTree> iterator() {
       
            return Arrays.asList(srcs).iterator();
            
        }

        /**
         * Holds the various btrees that are the sources for the view.
         * 
         * FIXME Change this to assemble the AbstractBTree[] dynamically from the
         * {@link #btree} hard reference and hard references to the
         * {@link IndexSegmentStore} using
         * {@link IndexSegmentStore#loadIndexSegment()}. We could actually use hard
         * references for the index segments inside of a {@link WeakReference} to an
         * array of those references.
         */
        private final AbstractBTree[] srcs;

        @Override
        final public AbstractBTree[] getSources() {

            // Note: clone the array to prevent modification.
            return srcs.clone();
            
        }

        public HardRefSources(final AbstractBTree[] a) {
            
            checkSources(a);
            
            this.btree = (BTree) a[0];
            
            this.srcs = a.clone();

        }
        
    }

    /**
     * Implementation using a hard reference for the mutable {@link BTree} and
     * any other {@link BTree}s in the view and hard references to the
     * {@link IndexSegmentStore}s for the non-{@link BTree} sources in the
     * view. and hard. The {@link IndexSegmentStore} internally uses a
     * {@link WeakReference} to (re-)open the {@link IndexSegment} on demand.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
     */
    private static class WeakRefSources implements ISources {

        /**
         * The #of sources.
         */
        private final int count;
        
        /**
         * A hard reference to the mutable {@link BTree} from index zero of the
         * sources specified to the ctor.
         */
        private final BTree btree;
        
        /**
         * A hard reference to any source which was a {@link BTree} with
         * <code>null</code>s in the other elements of the array.
         */
        private final BTree[] btreeSources;

        /**
         * A hard reference to the {@link IndexSegmentStore} for any source that
         * was an {@link IndexSegment} with <code>null</code>s in the other 
         * elements of the array.
         */
        private final IndexSegmentStore[] segmentStores;
        
        @Override
        public BTree getMutableBTree() {
            return btree;
        }
        
        @Override
        public int getSourceCount() {
            return count;
        }

        @Override
        public AbstractBTree[] getSources() {

            final AbstractBTree[] a = new AbstractBTree[count];

            for (int i = 0; i < count; i++) {

                if (btreeSources[i] != null) {

                    a[i] = btreeSources[i];

                } else {

                    /*
                     * Note: This provides a canonicalizing mapping using a weak
                     * reference and thereby decouples the FusedView from a hard
                     * reference to the IndexSegment.
                     */

                    a[i] = segmentStores[i].loadIndexSegment();

                }

            }

            return a;

        }

        @Override
        public Iterator<AbstractBTree> iterator() {

            return Arrays.asList(getSources()).iterator();
            
        }

        public WeakRefSources(final AbstractBTree[] a) {
            
            checkSources(a);

            this.count = a.length;

            this.btree = (BTree) a[0];

            this.btreeSources = new BTree[count];

            this.segmentStores = new IndexSegmentStore[count];

            for (int i = 0; i < count; i++) {

                if (a[i] instanceof BTree) {

                    btreeSources[i] = (BTree) a[i];

                } else {

                    segmentStores[i] = ((IndexSegment) a[i]).getStore();

                }
                
            }

        }
        
    }
    
    private final ISources sources;
    
    @Override
    final public AbstractBTree[] getSources() {

        return sources.getSources();
        
    }

    @Override
    final public int getSourceCount() {
        
        return sources.getSourceCount();
        
    }
    
    @Override
    final public BTree getMutableBTree() {
        
        return sources.getMutableBTree();
        
    }
    
//    /**
//     * A {@link ThreadLocal} {@link Tuple} that is used to copy the value
//     * associated with a key out of the btree during lookup operations.
//     * <p>
//     * Note: This field is NOT static. This limits the scope of the
//     * {@link ThreadLocal} {@link Tuple} to the containing {@link FusedView}
//     * instance.
//     */
//    protected final ThreadLocal<Tuple> lookupTuple = new ThreadLocal<Tuple>() {
//
//        @Override
//        protected Tuple initialValue() {
//
//            return new Tuple(getMutableBTree(),VALS);
//
//        }
//
//    };
//    
//    /**
//     * A {@link ThreadLocal} {@link Tuple} that is used for contains() tests.
//     * The tuple does not copy either the keys or the values. Contains is
//     * implemented as a lookup operation that either return this tuple or
//     * <code>null</code>. When isolation is supported, the version metadata
//     * is examined to determine if the matching entry is flagged as deleted in
//     * which case contains() will report "false".
//     * <p>
//     * Note: This field is NOT static. This limits the scope of the
//     * {@link ThreadLocal} {@link Tuple} to the containing {@link FusedView}
//     * instance.
//     */
//    protected final ThreadLocal<Tuple> containsTuple = new ThreadLocal<Tuple>() {
//
//        @Override
//        protected com.bigdata.btree.Tuple initialValue() {
//
//            return new Tuple(getMutableBTree(), 0);
//            
//        }
//        
//    };
    
    @Override
    public String toString() {
        
    	final StringBuilder sb = new StringBuilder();
        
        sb.append(getClass().getSimpleName());
        
        sb.append("{ ");

        sb.append(Arrays.toString(getSources()));
        
        sb.append("}");
        
        return sb.toString();
        
    }
    
    protected void assertNotReadOnly() {
        
        if (getMutableBTree().isReadOnly()) {

            // Can't write on this view.
            throw new IllegalStateException();
            
        }
        
    }
    
    @Override
    public IResourceMetadata[] getResourceMetadata() {

        final int n = getSourceCount();
        
        final IResourceMetadata[] resources = new IResourceMetadata[n];

        int i = 0;
        for(AbstractBTree t : sources) {
//        for (int i = 0; i < srcs.length; i++) {

            resources[i++] = t.getStore().getResourceMetadata();

        }

        return resources;

    }

    public FusedView(final AbstractBTree src1, final AbstractBTree src2) {

        this(new AbstractBTree[] { src1, src2 });

    }

    /**
     * 
     * @param srcs
     *            The ordered sources for the fused view. The order of the
     *            elements in this array determines which value will be selected
     *            for a given key by lookup() and which value is retained by
     *            rangeQuery().
     * 
     * @exception IllegalArgumentException
     *                if a source is used more than once.
     * @exception IllegalArgumentException
     *                unless all sources have the same indexUUID
     * @exception IllegalArgumentException
     *                unless all sources support delete markers.
     */
    public FusedView(final AbstractBTree[] srcs) {

        /*
         * Note: This has been abstracted and modified to NOT hold a hard
         * reference to the index segments in the view so that they may be
         * closed even while the view itself is open. This was done in an
         * attempt to reduce the memory demand associated with open index
         * segments.
         */
        
        if(false) {
        
            // hard reference to each source in the view.
            sources = new HardRefSources(srcs);
            
        } else {
            
            // hard reference only to BTrees in the view.
            sources = new WeakRefSources(srcs);
            
        }
        
    }
    
    /**
     * Checks the sources to make sure that they all support delete markers
     * (required for views), all non-null, and all have the same index UUID.
     * 
     * @param srcs
     *            The sources for a view.
     */
    static void checkSources(final AbstractBTree[] srcs) {
        
        if (srcs == null)
            throw new IllegalArgumentException("sources is null");

        /*
         * @todo allow this as a degenerate case, or create a factory that
         * produces the appropriate view?
         */
        if (srcs.length < 2) {
            
            throw new IllegalArgumentException(
                    "At least two sources are required");
            
        }
        
        for (int i = 0; i < srcs.length; i++) {
            
            if (srcs[i] == null)
                throw new IllegalArgumentException("Source null @ index=" + i);

            if (!srcs[i].getIndexMetadata().getDeleteMarkers()) {

                throw new IllegalArgumentException(
                        "Source does not maintain delete markers @ index=" + i);

            }

            for (int j = 0; j < i; j++) {
                
                if (srcs[i] == srcs[j])
                    
                    throw new IllegalArgumentException(
                            "Source used more than once"
                            );
                

                if (!srcs[i].getIndexMetadata().getIndexUUID().equals(
                        srcs[j].getIndexMetadata().getIndexUUID())) {
                    
                    throw new IllegalArgumentException(
                            "Sources have different index UUIDs @ index=" + i
                            );
                    
                }
                
            }
            
        }
        
    }

    @Override
    public IndexMetadata getIndexMetadata() {
        
        return getMutableBTree().getIndexMetadata();
        
    }
    
     
//    public IBloomFilter getBloomFilter() {
//
//        // double checked locking.
//        if (bloomFilter == null) {
//
//            synchronized (this) {
//                if (noBloom)
//                    return null;
//                for (AbstractBTree tree : getSources()) {
//                    if (tree.getBloomFilter() == null) {
//                        noBloom = true;
//                        return null;
//                    }
//                }
//                bloomFilter = new FusedBloomFilter();
//
//            }
//
//        }
//
//        return bloomFilter;
//
//    }
//
//    private volatile boolean noBloom = false;

    @Override
    public IBloomFilter getBloomFilter() {
        
        // double checked locking.
        if (bloomFilter == null) {

            synchronized (this) {

                bloomFilter = new FusedBloomFilter();
                
            }

        }

        return bloomFilter;

    }

    private volatile IBloomFilter bloomFilter = null;

    @Override
    final public CounterSet getCounters() {

    	final CounterSet counterSet = new CounterSet();

        int i = 0;
        for(AbstractBTree t : sources) {
            
            counterSet.makePath("view[" + i + "]").attach(t.getCounters());
            
            i++;
        
        }

        return counterSet;
        
    }

    /**
     * The counter for the first source.
     */
    @Override
    public ICounter getCounter() {
        
        return getMutableBTree().getCounter();
        
    }
    
    /**
     * {@inheritDoc}
     * <p>
     * Resolves the old value against the view and then directs the write to the
     * first of the sources specified to the ctor.
     */
    @Override
    public byte[] insert(final byte[] key, final byte[] value) {

        final byte[] oldval = lookup(key);
        
        getMutableBTree().insert(key, value);
        
        return oldval;

    }
    
    /**
	 * {@inheritDoc}
	 * <p>
	 * This case is a bit tricky. Since it is possible for the value stored
	 * under a key to be null, we need to obtain the Tuple for the key from the
	 * view. If the tuple is null or deleted, then we can do an unconditional
	 * insert. Otherwise there is an entry under the key and we return the value
	 * of the entry from the Tuple. Note that the value COULD be a null.
	 */
    @Override
    public byte[] putIfAbsent(final byte[] key, final byte[] value) {

    	final Tuple tuple = lookup(key, getMutableBTree().getLookupTuple());

        if (tuple == null || tuple.isDeletedVersion()) {

            /*
             * Interpret a deletion marker as "not found".
             */

        	// unconditional insert.
        	getMutableBTree().insert(key, value);

        	// nothing was in the index under that key.
            return null;
            
        }

        // return the pre-existing value under the key.
        return tuple.getValue();

    }
    
    public Object insert(Object key, Object val) {

        key = getTupleSerializer().serializeKey(key);

        val = getTupleSerializer().serializeVal(val);

        final ITuple tuple = lookup((byte[]) key, getMutableBTree().getLookupTuple());

        // direct the write to the first source.
        getMutableBTree().insert((byte[]) key, (byte[]) val);
        
        if (tuple == null || tuple.isDeletedVersion()) {

            /*
             * Either there was no entry under that key for any source or the
             * entry is already marked as deleted in the view.
             */
            
            return null;
            
        }

        return tuple.getObject();
        
    }

    /**
     * Resolves the old value against the view and then directs the write to the
     * first of the sources specified to the ctor. The remove is in fact treated
     * as writing a deleted marker into the index.
     */
    public byte[] remove(final byte[] key) {

        /*
         * Slight optimization prevents remove() from writing on the index if
         * there is no entry under that key for any source (or if there is
         * already a deleted entry under that key).
         */

        final Tuple tuple = lookup(key, getMutableBTree().getLookupTuple());

        if (tuple == null || tuple.isDeletedVersion()) {

            /*
             * Either there was no entry under that key for any source or the
             * entry is already marked as deleted in the view so we are done.
             */
            
            return null;
            
        }

        final byte[] oldval = tuple.getValue();

        // remove from the 1st source.
        getMutableBTree().remove(key);
        
        return oldval;

    }
    
    public Object remove(Object key) {

        key = getTupleSerializer().serializeKey(key);
        
        /*
         * Slight optimization prevents remove() from writing on the index if
         * there is no entry under that key for any source (or if there is
         * already a deleted entry under that key).
         */
        final Tuple tuple = lookup((byte[])key, getMutableBTree().getLookupTuple());

        if (tuple == null || tuple.isDeletedVersion()) {

            /*
             * Either there was no entry under that key for any source or the
             * entry is already marked as deleted in the view so we are done.
             */
            
            return null;
            
        }

        // remove from the 1st source.
        getMutableBTree().remove(key);

        return tuple.getObject();

    }

    /**
     * {@inheritDoc}
     * <p>
     * Return the first value for the key in an ordered search of the trees in
     * the view.
     */
    @Override
    final public byte[] lookup(final byte[] key) {

        final Tuple tuple = lookup(key, getMutableBTree().getLookupTuple());

        if (tuple == null || tuple.isDeletedVersion()) {

            /*
             * Interpret a deletion marker as "not found".
             */
            
            return null;
            
        }

        return tuple.getValue();
        
    }

    @Override
    public Object lookup(Object key) {

        key = getTupleSerializer().serializeKey(key);

        final Tuple tuple = lookup((byte[]) key, getMutableBTree().getLookupTuple());

        if (tuple == null || tuple.isDeletedVersion()) {

            /*
             * Interpret a deletion marker as "not found".
             */
            
            return null;
            
        }

        return tuple.getObject();
        
    }

    /**
     * Per {@link AbstractBTree#lookup(byte[], Tuple)} but0 processes the
     * {@link AbstractBTree}s in the view in their declared sequence and stops
     * when it finds the first index entry for the key, even it the entry is
     * marked as deleted for that key.
     * 
     * @param key
     *            The search key.
     * @param tuple
     *            A tuple to be populated with data and metadata about the index
     *            entry (required).
     * 
     * @return <i>tuple</i> iff an index entry was found under that <i>key</i>.
     */
    final public Tuple lookup(final byte[] key, final Tuple tuple) {

        return lookup(0, key, tuple);

    }

    /**
     * Core implementation processes the {@link AbstractBTree}s in the view in
     * their declared sequence and stops when it finds the first index entry for
     * the key, even it the entry is marked as deleted for that key.
     * 
     * @param startIndex
     *            The index of the first source to be read. This permits the
     *            lookup operation to start at an index into the {@link #srcs}
     *            other than zero. This is used by {@link IsolatedFusedView} to
     *            read from just the groundState (everything except the
     *            writeSet, which is the source at index zero(0)).
     * @param key
     *            The search key.
     * @param tuple
     *            A tuple to be populated with data and metadata about the index
     *            entry (required).
     * 
     * @return <i>tuple</i> iff an index entry was found under that <i>key</i>.
     */
    final protected Tuple lookup(final int startIndex, final byte[] key,
            final Tuple tuple) {

        for(AbstractBTree t : sources) {

            if( t.lookup(key, tuple) == null) {
                
                // No match yet.
                
                continue;
                
            }

            return tuple;
            
        }

        // no match.
        
        return null;

    }
    
	/**
	 * {@inheritDoc}
	 * <p>
	 * Processes the {@link AbstractBTree}s in the view in sequence and returns
	 * true iff the first {@link AbstractBTree} with an index entry under the
	 * key is non-deleted.
	 */
    @Override
    final public boolean contains(final byte[] key) {

        final Tuple tuple = lookup(key, getMutableBTree().getContainsTuple());
        
        if (tuple == null || tuple.isDeletedVersion()) {

            /*
             * Interpret a deletion marker as "not found".
             */
            
            return false;
            
        }

        return true;
        
    }

    @Override
    public boolean contains(Object key) {
        
        key = getTupleSerializer().serializeKey(key);

        return contains((byte[]) key);
        
    }

    private ITupleSerializer getTupleSerializer() {
        
        return getIndexMetadata().getTupleSerializer();
        
    }
    
    /**
     * {@inheritDoc}
     * <p>
     * Returns the sum of the range count on each index in the view. This is the
     * maximum #of entries that could lie within that key range. However, the
     * actual number could be less if there are entries for the same key in more
     * than one source index.
     */
    @Override
   final public long rangeCount() {

        return rangeCount(null/* fromKey */, null/* toKey */);
        
    }

    /**
     * {@inheritDoc}
     * <p>
     * Returns the sum of the range count on each index in the view. This is the
     * maximum #of entries that could lie within that key range. However, the
     * actual number could be less if there are entries for the same key in more
     * than one source index.
     * 
     * @todo this could be done using concurrent threads.
     */
    @Override
    final public long rangeCount(byte[] fromKey, byte[] toKey) {

        if (fromKey == null || toKey == null) {

            /*
             * Note: When an index partition is split, the new index partitions
             * will initially use the same source index segments as the original
             * index partition. Therefore we MUST impose an explicit constraint
             * on the fromKey / toKey if none is given so that we do not read
             * tuples lying outside of the index partition boundaries! However,
             * if there is only a BTree in the view then the partition metadata
             * might not be defined, so we check for that first.
             * 
             * TODO Review this assertion and optimize range count which span a
             * shard view. See
             * http://sourceforge.net/apps/trac/bigdata/ticket/470 (Optimize
             * range counts on cluster)
             */

            final LocalPartitionMetadata pmd = getIndexMetadata()
                    .getPartitionMetadata();

            if (pmd != null) {
            
                if (fromKey == null) {
                
                    fromKey = pmd.getLeftSeparatorKey();
                    
                }

                if (toKey == null) {

                    toKey = pmd.getRightSeparatorKey();

                }

            }

        }
        
        long count = 0;
        
        for(AbstractBTree t : sources) {

            final long inc = t.rangeCount(fromKey, toKey);

            if (count + inc < count) {

                log.warn(ERR_RANGE_COUNT_EXCEEDS_MAX_LONG);
                
                return Long.MAX_VALUE;
                
            }

            count += inc;
            
        }
        
        return count;
        
    }

    /**
     * {@inheritDoc}
     * <p>
     * The exact range count is obtained using a key-range scan over the view.
     */
    @Override
    final public long rangeCountExact(byte[] fromKey, byte[] toKey) {

        if (fromKey == null || toKey == null) {

            /*
             * Note: When an index partition is split, the new index partitions
             * will initially use the same source index segments as the original
             * index partition. Therefore we MUST impose an explicit constraint
             * on the fromKey / toKey if none is given so that we do not read
             * tuples lying outside of the index partition boundaries! However,
             * if there is only a BTree in the view then the partition metadata
             * might not be defined, so we check for that first.
             */

            final LocalPartitionMetadata pmd = getIndexMetadata()
                    .getPartitionMetadata();

            if (pmd != null) {
            
                if (fromKey == null) {
                
                    fromKey = pmd.getLeftSeparatorKey();
                    
                }

                if (toKey == null) {

                    toKey = pmd.getRightSeparatorKey();

                }

            }

        }

        final ITupleIterator itr = rangeIterator(fromKey, toKey,
                0/* capacity */, 0/* flags */, null/* filter */);

        long n = 0;

        while (itr.hasNext()) {

            itr.next();

            if (n == Long.MAX_VALUE)
                throw new RuntimeException(ERR_RANGE_COUNT_EXCEEDS_MAX_LONG);
            
            n++;

        }

        return n;
        
    }
    
    /**
     * {@inheritDoc}
     * <p>
     * An exact range count that includes any deleted tuples. This is obtained
     * using a key-range scan over the view.
     * 
     * @see #rangeCountExact(byte[], byte[])
     */
    @Override
    public long rangeCountExactWithDeleted(byte[] fromKey, byte[] toKey) {

        if (fromKey == null || toKey == null) {

            /*
             * Note: When an index partition is split, the new index partitions
             * will initially use the same source index segments as the original
             * index partition. Therefore we MUST impose an explicit constraint
             * on the fromKey / toKey if none is given so that we do not read
             * tuples lying outside of the index partition boundaries! However,
             * if there is only a BTree in the view then the partition metadata
             * might not be defined, so we check for that first.
             */

            final LocalPartitionMetadata pmd = getIndexMetadata()
                    .getPartitionMetadata();

            if (pmd != null) {

                if (fromKey == null) {

                    fromKey = pmd.getLeftSeparatorKey();

                }

                if (toKey == null) {

                    toKey = pmd.getRightSeparatorKey();

                }

            }

        }

        // set the DELETED flag so we also see the deleted tuples.
        final Iterator itr = rangeIterator(fromKey, toKey, 0/* capacity */,
                IRangeQuery.DELETED/* flags */, null/* filter */);

        long n = 0L;

        while (itr.hasNext()) {

            itr.next();

            if (n == Long.MAX_VALUE)
                throw new RuntimeException(ERR_RANGE_COUNT_EXCEEDS_MAX_LONG);

            n++;

        }

        return n;

    }

    @Override
    public ITupleIterator rangeIterator() {

        return rangeIterator(null, null);
        
    }

    /**
     * {@inheritDoc}
     * <p>
     * Returns an iterator that visits the distinct entries. When an entry
     * appears in more than one index, the entry is chosen based on the order
     * in which the indices were declared to the constructor.
     */
    @Override
    final public ITupleIterator rangeIterator(final byte[] fromKey,
            final byte[] toKey) {

        return rangeIterator(fromKey, toKey, 0/* capacity */,
                DEFAULT/* flags */, null/* filter */);
        
    }

    /**
     * <p>
     * Core implementation.
     * </p>
     * <p>
     * Note: The {@link FusedView}'s iterator first obtains an ordered array of
     * iterators for each of the source {@link AbstractBTree}s. The <i>filter</i>
     * is NOT passed through to these source iterators. Instead, an
     * {@link FusedTupleIterator} is obtained and the filter is applied to that
     * iterator. This means that filters always see a fused representation of
     * the source iterators.
     * </p>
     * <p>
     * Note: This implementation supports {@link IRangeQuery#REVERSE}. This may
     * be used to locate the {@link ITuple} before a specified key, which is a
     * requirement for several aspects of the overall architecture including
     * atomic append of file blocks, locating an index partition in the metadata
     * index, and finding the last member of a set or map.
     * </p>
     * <p>
     * Note: When the {@link IRangeQuery#CURSOR} flag is specified, it is passed
     * through and an {@link ITupleCursor} is obtained for each source
     * {@link AbstractBTree}. A {@link FusedTupleCursor} is then obtained which
     * implements the {@link ITupleCursor} extensions.
     * </p>
     */
    @Override
    @SuppressWarnings("unchecked")
    public ITupleIterator rangeIterator(//
            byte[] fromKey,//
            byte[] toKey, //
            final int capacity, //
            final int flags,//
            final IFilter filter//
            ) {

        if (fromKey == null || toKey == null) {

            /*
             * Note: When an index partition is split, the new index partitions
             * will initially use the same source index segments as the original
             * index partition. Therefore we MUST impose an explicit constraint
             * on the fromKey / toKey if none is given so that we do not read
             * tuples lying outside of the index partition boundaries! However,
             * if there is only a BTree in the view then the partition metadata
             * might not be defined, so we check for that first.
             */

            final LocalPartitionMetadata pmd = getIndexMetadata()
                    .getPartitionMetadata();

            if (pmd != null) {
            
                if (fromKey == null) {
                
                    fromKey = pmd.getLeftSeparatorKey();
                    
                }

                if (toKey == null) {

                    toKey = pmd.getRightSeparatorKey();

                }

            }

        }

        // reverse scan?
        final boolean reverseScan = (flags & REVERSE) != 0;
        
        // cursor requested?
        final boolean cursorMode = (flags & CURSOR) != 0;
        
        // read only?
        final boolean readOnly = ((flags & READONLY) != 0);

        // iff the aggregate iterator should visit deleted entries.
        final boolean deleted = (flags & DELETED) != 0;
        
        // removeAll?
        final boolean removeAll = (flags & REMOVEALL) != 0;
        
        if (readOnly && removeAll) {

            // REMOVEALL is not compatible with READONLY.
            throw new IllegalArgumentException();

        }

        final int n = sources.getSourceCount();

        if (log.isInfoEnabled())
            log.info("nsrcs=" + n + ", flags=" + flags + ", readOnly="
                    + readOnly + ", deleted=" + deleted + ", reverseScan="
                    + reverseScan);
        
        /*
         * Note: We request KEYS since we need to compare the keys in order to
         * decide which tuple to return next.
         * 
         * Note: We request DELETED so that we will see deleted entries. This is
         * necessary in order for processing to stop at the first entry for a
         * give key regardless of whether it is deleted or not. If the caller
         * does not want to see the deleted tuples, then they are silently
         * dropped from the aggregate iterator.
         * 
         * Note: The [filter] is NOT passed through to the source iterators.
         * This is because the filter must be applied to the aggregate iterator
         * in order to operate on the fused view.
         * 
         * Note: The REVERSE flag is NOT passed through to the source iterators.
         * It is handled below by layering on a filter.
         * 
         * Note: The REMOVEALL flag is NOT passed through to the source
         * iterators. It is handled below by laying on a filter.
         */
        final int sourceFlags = (//
                (flags | KEYS | DELETED)//
                | (reverseScan || removeAll ? CURSOR : 0) //
                )//
                & (~REMOVEALL)// turn off
                & (~REVERSE)// turn off
                ;
        
        /*
         * The source iterator produces a fused view of the source indices. We
         * then layer the filter(s) over the fused view iterator. A subclass is
         * used if CURSOR support is required for the fused view.
         */
        ITupleIterator src;
        
        if (cursorMode || removeAll || reverseScan) {
        
            /*
             * CURSOR was specified for the aggregate iterator or is required to
             * support REMOVEALL.
             */

            final ITupleCursor[] itrs = new ITupleCursor[n];

            int i = 0;
            for(AbstractBTree t : sources) {

                itrs[i++] = (ITupleCursor) t.rangeIterator(fromKey, toKey,
                        capacity, sourceFlags, null/* filter */);

            }

            // Note: aggregate source implements ITupleCursor.
            src = new FusedTupleCursor(flags, deleted, itrs, 
                    readOnly?new ReadOnlyIndex(this):this);
            
        } else {

            /*
             * CURSOR was neither specified nor required for the aggregate
             * iterator.
             * 
             * Note: If reverse was specified, then we pass it into the source
             * iterators for the source B+Trees. The resulting fused iterator
             * will already have reversal traversal semantics so we do not need
             * to layer a Reverserator on top.
             */

            final ITupleIterator[] itrs = new ITupleIterator[n];

            int i = 0;
            for (AbstractBTree t : sources) {

               itrs[i++] = t.rangeIterator(fromKey, toKey, capacity,
                        sourceFlags, null/* filter */);

            }

            src = new FusedTupleIterator(flags, deleted, itrs);

        }

        if (reverseScan) {

            /*
             * Reverse scan iterator.
             * 
             * Note: The reverse scan MUST be layered directly over the
             * ITupleCursor. Most critically, REMOVEALL combined with a REVERSE
             * scan needs to process the tuples in reverse index order and then
             * delete them as it goes.
             */

            src = new Reverserator((ITupleCursor) src);

        }

        if (filter != null) {

            /*
             * Apply the optional filter.
             * 
             * Note: This needs to be after the reverse scan and before
             * REMOVEALL (those are the assumptions for the flags).
             */
            
            src = new WrappedTupleIterator(filter
                    .filter(src, null/* context */));
            
        }
        
        if ((flags & REMOVEALL) != 0) {
            
            assertNotReadOnly();
            
            /*
             * Note: This iterator removes each tuple that it visits from the
             * source iterator.
             */
            
            src = new TupleRemover() {
                private static final long serialVersionUID = 1L;
                @Override
                protected boolean remove(ITuple e) {
                    // remove all visited tuples.
                    return true;
                }
            }.filterOnce(src, null/* context */);

        }
        
        return src;

    }
    
    @Override
	final public <T> T submit(final byte[] key, final ISimpleIndexProcedure<T> proc) {

        return proc.apply(this);

    }

    @Override
    @SuppressWarnings("unchecked")
    final public void submit(byte[] fromKey, byte[] toKey,
            final IKeyRangeIndexProcedure proc, final IResultHandler handler) {

        if (fromKey == null) {

            /*
             * Note: When an index partition is split, the new index partitions
             * will initially use the same source index segments as the original
             * index partition. Therefore we MUST impose an explicit constraint
             * on the fromKey/toKey if none is given so that we do not read
             * tuples lying outside of the index partition boundaries!
             */
            fromKey = getIndexMetadata().getPartitionMetadata()
                    .getLeftSeparatorKey();

        }

        if (toKey == null) {

            /*
             * Note: When an index partition is split, the new index partitions
             * will initially use the same source index segments as the original
             * index partition. Therefore we MUST impose an explicit constraint
             * on the fromKey/toKey if none is given so that we do not read
             * tuples lying outside of the index partition boundaries!
             */
            toKey = getIndexMetadata().getPartitionMetadata()
                    .getRightSeparatorKey();

        }
        
        final Object result = proc.apply(this);
        
        if (handler != null) {
            
            handler.aggregate(result, new Split(null,0,0));
            
        }
        
    }
    
    @Override
    @SuppressWarnings("unchecked")
    public void submit(final int fromIndex, final int toIndex,
            final byte[][] keys, final byte[][] vals,
            final AbstractKeyArrayIndexProcedureConstructor ctor,
            final IResultHandler aggregator) {

        final Object result = ctor.newInstance(this, fromIndex, toIndex, keys,
                vals).apply(this);

        if (aggregator != null) {

            aggregator.aggregate(result, new Split(null, fromIndex, toIndex));

        }
        
    }

    /**
     * Inner class providing a fused view of the optional bloom filters
     * associated with each of the source indices.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
     */
    protected class FusedBloomFilter implements IBloomFilter {

        /**
         * Unsupported operation.
         * 
         * @throws UnsupportedOperationException
         *             always.
         */
        public boolean add(byte[] key) {
            
            throw new UnsupportedOperationException();
            
        }

        /**
         * Applies the {@link IBloomFilter} for each source index in turn and
         * returns <code>true</code> if ANY of the component index filters
         * return <code>true</code> (if any filters say that their index has
         * data for that key then you need to read the index). If a filter does
         * not exist (or has been disabled) for a given component index then the
         * code treats the filter as always reporting <code>true</code> (that
         * is, forcing us to check the index). So a source component index
         * without a bloom filter or an index with a disabled bloom filter acts
         * as its filter has a high false positive rate, however the test is a
         * NOP so it is cheap.
         */
        public boolean contains(final byte[] key) {

            final AbstractBTree[] srcs = getSources();
            
            for (int i = 0; i < srcs.length; i++) {

                final AbstractBTree src = srcs[i];
                
                final IBloomFilter filter = src.getBloomFilter();

                if ((i == 0 || i == 1) && srcs.length > i + 1
                        && src instanceof BTree
                        && srcs[i + 1].getBloomFilter() != null) {

                    /*
                     * Do a real point test when we have a FusedView and the 1st
                     * or 2nd component of the view is a BTree and there are
                     * additional components in the view and they have a bloom
                     * filter enabled. This covers the case where the BTree is
                     * absorbing writes (either isolated or unisolated, which is
                     * why we allow the 1st or 2nd component) and there are
                     * additional views, which presumably are IndexSegments. The
                     * reasoning is that the BTree contains test will be
                     * relatively fast and we still get to apply the bloom
                     * filters on the index segments, thereby avoiding a disk
                     * hit in those cases where the bloom filter for the mutable
                     * index on a ManagedJournal has been turned off but the
                     * index segments have perfect bloom filters that we still
                     * want to leverage. Testing the BTree instance here might
                     * touch the disk, but it will force the node path into the
                     * cache so if we do get a 'true' response from one of the
                     * bloom filters and have to test the indices it will not
                     * add additional disk hits.
                     */
                    
                    if(src.contains(key)) {
                        
                        // proven (but interpreted as probable hit).
                        return true;
                        
                    }
                    
                    // test the next source index.
                    continue;
                    
                }
                
                if (filter == null || filter.contains(key)) {

                    /*
                     * Either no filter, a disabled filter, or the filter exists
                     * and reports that it has seen the key. At the worst, this
                     * is a false positive and we will be forced to check the
                     * index.
                     */
                    
                    return true;
                    
                }

            }

            // proven to not be in the index.
            return false;

        }

        /**
         * This implementation notifies the bloom filter for the first source
         * index (if it exists). Normally false positives will be reported
         * directly to the specific bloom filter instance by the contains() or
         * lookup() method for that index. However, the
         * {@link AccessPath} also tests the bloom filter and needs a
         * means to report false positives. It should be the only one that calls
         * this method on this implementation class.
         */
        public void falsePos() {

            final IBloomFilter filter = getMutableBTree().getBloomFilter();

            if (filter != null) {

                filter.falsePos();

            }
            
        }
        
    }

}