/* Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Feb 11, 2008 */ package com.bigdata.btree.view; import java.lang.ref.WeakReference; import java.util.Arrays; import java.util.Iterator; import org.apache.log4j.Logger; import com.bigdata.btree.AbstractBTree; import com.bigdata.btree.BTree; import com.bigdata.btree.IAutoboxBTree; import com.bigdata.btree.IBloomFilter; import com.bigdata.btree.ICounter; import com.bigdata.btree.IIndex; import com.bigdata.btree.ILinearList; import com.bigdata.btree.ILocalBTreeView; import com.bigdata.btree.IRangeQuery; import com.bigdata.btree.ITuple; import com.bigdata.btree.ITupleCursor; import com.bigdata.btree.ITupleIterator; import com.bigdata.btree.ITupleSerializer; import com.bigdata.btree.IndexMetadata; import com.bigdata.btree.IndexSegment; import com.bigdata.btree.IndexSegmentStore; import com.bigdata.btree.ReadOnlyIndex; import com.bigdata.btree.Tuple; import com.bigdata.btree.filter.Reverserator; import com.bigdata.btree.filter.TupleRemover; import com.bigdata.btree.filter.WrappedTupleIterator; import com.bigdata.btree.isolation.IsolatedFusedView; import com.bigdata.btree.proc.AbstractKeyArrayIndexProcedureConstructor; import com.bigdata.btree.proc.IKeyRangeIndexProcedure; import com.bigdata.btree.proc.IResultHandler; import com.bigdata.btree.proc.ISimpleIndexProcedure; import com.bigdata.counters.CounterSet; import com.bigdata.mdi.IResourceMetadata; import com.bigdata.mdi.LocalPartitionMetadata; import com.bigdata.relation.accesspath.AccessPath; import com.bigdata.service.MetadataService; import com.bigdata.service.Split; import cutthecrap.utils.striterators.IFilter; /** * <p> * A fused view providing read-write operations on multiple B+-Trees mapping * variable length unsigned byte[] keys to arbitrary values. The sources MUST * support deletion markers. The order of the sources MUST correspond to the * recency of their data. Writes will be directed to the first source in the * sequence (the most recent source). Deletion markers are used to prevent a * miss on a key for a source from reading through to an older source. If a * deletion marker is encountered the index entry will be understood as "not * found" in the fused view rather than reading through to an older source where * it might still have a binding. * </p> * * @todo consider implementing {@link IAutoboxBTree} here and collapsing * {@link ILocalBTreeView} and {@link IAutoboxBTree}. * * @todo Can I implement {@link ILinearList} here? That would make it possible * to use keyAt() and indexOf() and might pave the way for a * {@link MetadataService} that supports overflow since the index segments * could be transparent at that point. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ public class FusedView implements IIndex, ILocalBTreeView {//, IValueAge { protected static final Logger log = Logger.getLogger(FusedView.class); /** * Error message if the view has more than {@link Long#MAX_VALUE} elements * and you requested an exact range count. */ static protected transient final String ERR_RANGE_COUNT_EXCEEDS_MAX_LONG = "The range count can not be expressed as a 64-bit signed integer"; /** * Encapsulates the sources. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ private interface ISources extends Iterable<AbstractBTree> { /** * The mutable {@link BTree} for the view. */ public BTree getMutableBTree(); /** * The #of sources in the view. */ public int getSourceCount(); /** * Visits the sources in order. */ @Override public Iterator<AbstractBTree> iterator(); /** * Cloned copy of the sources objects. */ public AbstractBTree[] getSources(); } /** * Implementation based on a hard reference array which directly captures * the {@link AbstractBTree}[]. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ private static class HardRefSources implements ISources { /** * A hard reference to the mutable {@link BTree} from index zero of the * sources specified to the ctor. */ private final BTree btree; @Override public BTree getMutableBTree() { return btree; } @Override public int getSourceCount() { return srcs.length; } @Override public Iterator<AbstractBTree> iterator() { return Arrays.asList(srcs).iterator(); } /** * Holds the various btrees that are the sources for the view. * * FIXME Change this to assemble the AbstractBTree[] dynamically from the * {@link #btree} hard reference and hard references to the * {@link IndexSegmentStore} using * {@link IndexSegmentStore#loadIndexSegment()}. We could actually use hard * references for the index segments inside of a {@link WeakReference} to an * array of those references. */ private final AbstractBTree[] srcs; @Override final public AbstractBTree[] getSources() { // Note: clone the array to prevent modification. return srcs.clone(); } public HardRefSources(final AbstractBTree[] a) { checkSources(a); this.btree = (BTree) a[0]; this.srcs = a.clone(); } } /** * Implementation using a hard reference for the mutable {@link BTree} and * any other {@link BTree}s in the view and hard references to the * {@link IndexSegmentStore}s for the non-{@link BTree} sources in the * view. and hard. The {@link IndexSegmentStore} internally uses a * {@link WeakReference} to (re-)open the {@link IndexSegment} on demand. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ private static class WeakRefSources implements ISources { /** * The #of sources. */ private final int count; /** * A hard reference to the mutable {@link BTree} from index zero of the * sources specified to the ctor. */ private final BTree btree; /** * A hard reference to any source which was a {@link BTree} with * <code>null</code>s in the other elements of the array. */ private final BTree[] btreeSources; /** * A hard reference to the {@link IndexSegmentStore} for any source that * was an {@link IndexSegment} with <code>null</code>s in the other * elements of the array. */ private final IndexSegmentStore[] segmentStores; @Override public BTree getMutableBTree() { return btree; } @Override public int getSourceCount() { return count; } @Override public AbstractBTree[] getSources() { final AbstractBTree[] a = new AbstractBTree[count]; for (int i = 0; i < count; i++) { if (btreeSources[i] != null) { a[i] = btreeSources[i]; } else { /* * Note: This provides a canonicalizing mapping using a weak * reference and thereby decouples the FusedView from a hard * reference to the IndexSegment. */ a[i] = segmentStores[i].loadIndexSegment(); } } return a; } @Override public Iterator<AbstractBTree> iterator() { return Arrays.asList(getSources()).iterator(); } public WeakRefSources(final AbstractBTree[] a) { checkSources(a); this.count = a.length; this.btree = (BTree) a[0]; this.btreeSources = new BTree[count]; this.segmentStores = new IndexSegmentStore[count]; for (int i = 0; i < count; i++) { if (a[i] instanceof BTree) { btreeSources[i] = (BTree) a[i]; } else { segmentStores[i] = ((IndexSegment) a[i]).getStore(); } } } } private final ISources sources; @Override final public AbstractBTree[] getSources() { return sources.getSources(); } @Override final public int getSourceCount() { return sources.getSourceCount(); } @Override final public BTree getMutableBTree() { return sources.getMutableBTree(); } // /** // * A {@link ThreadLocal} {@link Tuple} that is used to copy the value // * associated with a key out of the btree during lookup operations. // * <p> // * Note: This field is NOT static. This limits the scope of the // * {@link ThreadLocal} {@link Tuple} to the containing {@link FusedView} // * instance. // */ // protected final ThreadLocal<Tuple> lookupTuple = new ThreadLocal<Tuple>() { // // @Override // protected Tuple initialValue() { // // return new Tuple(getMutableBTree(),VALS); // // } // // }; // // /** // * A {@link ThreadLocal} {@link Tuple} that is used for contains() tests. // * The tuple does not copy either the keys or the values. Contains is // * implemented as a lookup operation that either return this tuple or // * <code>null</code>. When isolation is supported, the version metadata // * is examined to determine if the matching entry is flagged as deleted in // * which case contains() will report "false". // * <p> // * Note: This field is NOT static. This limits the scope of the // * {@link ThreadLocal} {@link Tuple} to the containing {@link FusedView} // * instance. // */ // protected final ThreadLocal<Tuple> containsTuple = new ThreadLocal<Tuple>() { // // @Override // protected com.bigdata.btree.Tuple initialValue() { // // return new Tuple(getMutableBTree(), 0); // // } // // }; @Override public String toString() { final StringBuilder sb = new StringBuilder(); sb.append(getClass().getSimpleName()); sb.append("{ "); sb.append(Arrays.toString(getSources())); sb.append("}"); return sb.toString(); } protected void assertNotReadOnly() { if (getMutableBTree().isReadOnly()) { // Can't write on this view. throw new IllegalStateException(); } } @Override public IResourceMetadata[] getResourceMetadata() { final int n = getSourceCount(); final IResourceMetadata[] resources = new IResourceMetadata[n]; int i = 0; for(AbstractBTree t : sources) { // for (int i = 0; i < srcs.length; i++) { resources[i++] = t.getStore().getResourceMetadata(); } return resources; } public FusedView(final AbstractBTree src1, final AbstractBTree src2) { this(new AbstractBTree[] { src1, src2 }); } /** * * @param srcs * The ordered sources for the fused view. The order of the * elements in this array determines which value will be selected * for a given key by lookup() and which value is retained by * rangeQuery(). * * @exception IllegalArgumentException * if a source is used more than once. * @exception IllegalArgumentException * unless all sources have the same indexUUID * @exception IllegalArgumentException * unless all sources support delete markers. */ public FusedView(final AbstractBTree[] srcs) { /* * Note: This has been abstracted and modified to NOT hold a hard * reference to the index segments in the view so that they may be * closed even while the view itself is open. This was done in an * attempt to reduce the memory demand associated with open index * segments. */ if(false) { // hard reference to each source in the view. sources = new HardRefSources(srcs); } else { // hard reference only to BTrees in the view. sources = new WeakRefSources(srcs); } } /** * Checks the sources to make sure that they all support delete markers * (required for views), all non-null, and all have the same index UUID. * * @param srcs * The sources for a view. */ static void checkSources(final AbstractBTree[] srcs) { if (srcs == null) throw new IllegalArgumentException("sources is null"); /* * @todo allow this as a degenerate case, or create a factory that * produces the appropriate view? */ if (srcs.length < 2) { throw new IllegalArgumentException( "At least two sources are required"); } for (int i = 0; i < srcs.length; i++) { if (srcs[i] == null) throw new IllegalArgumentException("Source null @ index=" + i); if (!srcs[i].getIndexMetadata().getDeleteMarkers()) { throw new IllegalArgumentException( "Source does not maintain delete markers @ index=" + i); } for (int j = 0; j < i; j++) { if (srcs[i] == srcs[j]) throw new IllegalArgumentException( "Source used more than once" ); if (!srcs[i].getIndexMetadata().getIndexUUID().equals( srcs[j].getIndexMetadata().getIndexUUID())) { throw new IllegalArgumentException( "Sources have different index UUIDs @ index=" + i ); } } } } @Override public IndexMetadata getIndexMetadata() { return getMutableBTree().getIndexMetadata(); } // public IBloomFilter getBloomFilter() { // // // double checked locking. // if (bloomFilter == null) { // // synchronized (this) { // if (noBloom) // return null; // for (AbstractBTree tree : getSources()) { // if (tree.getBloomFilter() == null) { // noBloom = true; // return null; // } // } // bloomFilter = new FusedBloomFilter(); // // } // // } // // return bloomFilter; // // } // // private volatile boolean noBloom = false; @Override public IBloomFilter getBloomFilter() { // double checked locking. if (bloomFilter == null) { synchronized (this) { bloomFilter = new FusedBloomFilter(); } } return bloomFilter; } private volatile IBloomFilter bloomFilter = null; @Override final public CounterSet getCounters() { final CounterSet counterSet = new CounterSet(); int i = 0; for(AbstractBTree t : sources) { counterSet.makePath("view[" + i + "]").attach(t.getCounters()); i++; } return counterSet; } /** * The counter for the first source. */ @Override public ICounter getCounter() { return getMutableBTree().getCounter(); } /** * {@inheritDoc} * <p> * Resolves the old value against the view and then directs the write to the * first of the sources specified to the ctor. */ @Override public byte[] insert(final byte[] key, final byte[] value) { final byte[] oldval = lookup(key); getMutableBTree().insert(key, value); return oldval; } /** * {@inheritDoc} * <p> * This case is a bit tricky. Since it is possible for the value stored * under a key to be null, we need to obtain the Tuple for the key from the * view. If the tuple is null or deleted, then we can do an unconditional * insert. Otherwise there is an entry under the key and we return the value * of the entry from the Tuple. Note that the value COULD be a null. */ @Override public byte[] putIfAbsent(final byte[] key, final byte[] value) { final Tuple tuple = lookup(key, getMutableBTree().getLookupTuple()); if (tuple == null || tuple.isDeletedVersion()) { /* * Interpret a deletion marker as "not found". */ // unconditional insert. getMutableBTree().insert(key, value); // nothing was in the index under that key. return null; } // return the pre-existing value under the key. return tuple.getValue(); } public Object insert(Object key, Object val) { key = getTupleSerializer().serializeKey(key); val = getTupleSerializer().serializeVal(val); final ITuple tuple = lookup((byte[]) key, getMutableBTree().getLookupTuple()); // direct the write to the first source. getMutableBTree().insert((byte[]) key, (byte[]) val); if (tuple == null || tuple.isDeletedVersion()) { /* * Either there was no entry under that key for any source or the * entry is already marked as deleted in the view. */ return null; } return tuple.getObject(); } /** * Resolves the old value against the view and then directs the write to the * first of the sources specified to the ctor. The remove is in fact treated * as writing a deleted marker into the index. */ public byte[] remove(final byte[] key) { /* * Slight optimization prevents remove() from writing on the index if * there is no entry under that key for any source (or if there is * already a deleted entry under that key). */ final Tuple tuple = lookup(key, getMutableBTree().getLookupTuple()); if (tuple == null || tuple.isDeletedVersion()) { /* * Either there was no entry under that key for any source or the * entry is already marked as deleted in the view so we are done. */ return null; } final byte[] oldval = tuple.getValue(); // remove from the 1st source. getMutableBTree().remove(key); return oldval; } public Object remove(Object key) { key = getTupleSerializer().serializeKey(key); /* * Slight optimization prevents remove() from writing on the index if * there is no entry under that key for any source (or if there is * already a deleted entry under that key). */ final Tuple tuple = lookup((byte[])key, getMutableBTree().getLookupTuple()); if (tuple == null || tuple.isDeletedVersion()) { /* * Either there was no entry under that key for any source or the * entry is already marked as deleted in the view so we are done. */ return null; } // remove from the 1st source. getMutableBTree().remove(key); return tuple.getObject(); } /** * {@inheritDoc} * <p> * Return the first value for the key in an ordered search of the trees in * the view. */ @Override final public byte[] lookup(final byte[] key) { final Tuple tuple = lookup(key, getMutableBTree().getLookupTuple()); if (tuple == null || tuple.isDeletedVersion()) { /* * Interpret a deletion marker as "not found". */ return null; } return tuple.getValue(); } @Override public Object lookup(Object key) { key = getTupleSerializer().serializeKey(key); final Tuple tuple = lookup((byte[]) key, getMutableBTree().getLookupTuple()); if (tuple == null || tuple.isDeletedVersion()) { /* * Interpret a deletion marker as "not found". */ return null; } return tuple.getObject(); } /** * Per {@link AbstractBTree#lookup(byte[], Tuple)} but0 processes the * {@link AbstractBTree}s in the view in their declared sequence and stops * when it finds the first index entry for the key, even it the entry is * marked as deleted for that key. * * @param key * The search key. * @param tuple * A tuple to be populated with data and metadata about the index * entry (required). * * @return <i>tuple</i> iff an index entry was found under that <i>key</i>. */ final public Tuple lookup(final byte[] key, final Tuple tuple) { return lookup(0, key, tuple); } /** * Core implementation processes the {@link AbstractBTree}s in the view in * their declared sequence and stops when it finds the first index entry for * the key, even it the entry is marked as deleted for that key. * * @param startIndex * The index of the first source to be read. This permits the * lookup operation to start at an index into the {@link #srcs} * other than zero. This is used by {@link IsolatedFusedView} to * read from just the groundState (everything except the * writeSet, which is the source at index zero(0)). * @param key * The search key. * @param tuple * A tuple to be populated with data and metadata about the index * entry (required). * * @return <i>tuple</i> iff an index entry was found under that <i>key</i>. */ final protected Tuple lookup(final int startIndex, final byte[] key, final Tuple tuple) { for(AbstractBTree t : sources) { if( t.lookup(key, tuple) == null) { // No match yet. continue; } return tuple; } // no match. return null; } /** * {@inheritDoc} * <p> * Processes the {@link AbstractBTree}s in the view in sequence and returns * true iff the first {@link AbstractBTree} with an index entry under the * key is non-deleted. */ @Override final public boolean contains(final byte[] key) { final Tuple tuple = lookup(key, getMutableBTree().getContainsTuple()); if (tuple == null || tuple.isDeletedVersion()) { /* * Interpret a deletion marker as "not found". */ return false; } return true; } @Override public boolean contains(Object key) { key = getTupleSerializer().serializeKey(key); return contains((byte[]) key); } private ITupleSerializer getTupleSerializer() { return getIndexMetadata().getTupleSerializer(); } /** * {@inheritDoc} * <p> * Returns the sum of the range count on each index in the view. This is the * maximum #of entries that could lie within that key range. However, the * actual number could be less if there are entries for the same key in more * than one source index. */ @Override final public long rangeCount() { return rangeCount(null/* fromKey */, null/* toKey */); } /** * {@inheritDoc} * <p> * Returns the sum of the range count on each index in the view. This is the * maximum #of entries that could lie within that key range. However, the * actual number could be less if there are entries for the same key in more * than one source index. * * @todo this could be done using concurrent threads. */ @Override final public long rangeCount(byte[] fromKey, byte[] toKey) { if (fromKey == null || toKey == null) { /* * Note: When an index partition is split, the new index partitions * will initially use the same source index segments as the original * index partition. Therefore we MUST impose an explicit constraint * on the fromKey / toKey if none is given so that we do not read * tuples lying outside of the index partition boundaries! However, * if there is only a BTree in the view then the partition metadata * might not be defined, so we check for that first. * * TODO Review this assertion and optimize range count which span a * shard view. See * http://sourceforge.net/apps/trac/bigdata/ticket/470 (Optimize * range counts on cluster) */ final LocalPartitionMetadata pmd = getIndexMetadata() .getPartitionMetadata(); if (pmd != null) { if (fromKey == null) { fromKey = pmd.getLeftSeparatorKey(); } if (toKey == null) { toKey = pmd.getRightSeparatorKey(); } } } long count = 0; for(AbstractBTree t : sources) { final long inc = t.rangeCount(fromKey, toKey); if (count + inc < count) { log.warn(ERR_RANGE_COUNT_EXCEEDS_MAX_LONG); return Long.MAX_VALUE; } count += inc; } return count; } /** * {@inheritDoc} * <p> * The exact range count is obtained using a key-range scan over the view. */ @Override final public long rangeCountExact(byte[] fromKey, byte[] toKey) { if (fromKey == null || toKey == null) { /* * Note: When an index partition is split, the new index partitions * will initially use the same source index segments as the original * index partition. Therefore we MUST impose an explicit constraint * on the fromKey / toKey if none is given so that we do not read * tuples lying outside of the index partition boundaries! However, * if there is only a BTree in the view then the partition metadata * might not be defined, so we check for that first. */ final LocalPartitionMetadata pmd = getIndexMetadata() .getPartitionMetadata(); if (pmd != null) { if (fromKey == null) { fromKey = pmd.getLeftSeparatorKey(); } if (toKey == null) { toKey = pmd.getRightSeparatorKey(); } } } final ITupleIterator itr = rangeIterator(fromKey, toKey, 0/* capacity */, 0/* flags */, null/* filter */); long n = 0; while (itr.hasNext()) { itr.next(); if (n == Long.MAX_VALUE) throw new RuntimeException(ERR_RANGE_COUNT_EXCEEDS_MAX_LONG); n++; } return n; } /** * {@inheritDoc} * <p> * An exact range count that includes any deleted tuples. This is obtained * using a key-range scan over the view. * * @see #rangeCountExact(byte[], byte[]) */ @Override public long rangeCountExactWithDeleted(byte[] fromKey, byte[] toKey) { if (fromKey == null || toKey == null) { /* * Note: When an index partition is split, the new index partitions * will initially use the same source index segments as the original * index partition. Therefore we MUST impose an explicit constraint * on the fromKey / toKey if none is given so that we do not read * tuples lying outside of the index partition boundaries! However, * if there is only a BTree in the view then the partition metadata * might not be defined, so we check for that first. */ final LocalPartitionMetadata pmd = getIndexMetadata() .getPartitionMetadata(); if (pmd != null) { if (fromKey == null) { fromKey = pmd.getLeftSeparatorKey(); } if (toKey == null) { toKey = pmd.getRightSeparatorKey(); } } } // set the DELETED flag so we also see the deleted tuples. final Iterator itr = rangeIterator(fromKey, toKey, 0/* capacity */, IRangeQuery.DELETED/* flags */, null/* filter */); long n = 0L; while (itr.hasNext()) { itr.next(); if (n == Long.MAX_VALUE) throw new RuntimeException(ERR_RANGE_COUNT_EXCEEDS_MAX_LONG); n++; } return n; } @Override public ITupleIterator rangeIterator() { return rangeIterator(null, null); } /** * {@inheritDoc} * <p> * Returns an iterator that visits the distinct entries. When an entry * appears in more than one index, the entry is chosen based on the order * in which the indices were declared to the constructor. */ @Override final public ITupleIterator rangeIterator(final byte[] fromKey, final byte[] toKey) { return rangeIterator(fromKey, toKey, 0/* capacity */, DEFAULT/* flags */, null/* filter */); } /** * <p> * Core implementation. * </p> * <p> * Note: The {@link FusedView}'s iterator first obtains an ordered array of * iterators for each of the source {@link AbstractBTree}s. The <i>filter</i> * is NOT passed through to these source iterators. Instead, an * {@link FusedTupleIterator} is obtained and the filter is applied to that * iterator. This means that filters always see a fused representation of * the source iterators. * </p> * <p> * Note: This implementation supports {@link IRangeQuery#REVERSE}. This may * be used to locate the {@link ITuple} before a specified key, which is a * requirement for several aspects of the overall architecture including * atomic append of file blocks, locating an index partition in the metadata * index, and finding the last member of a set or map. * </p> * <p> * Note: When the {@link IRangeQuery#CURSOR} flag is specified, it is passed * through and an {@link ITupleCursor} is obtained for each source * {@link AbstractBTree}. A {@link FusedTupleCursor} is then obtained which * implements the {@link ITupleCursor} extensions. * </p> */ @Override @SuppressWarnings("unchecked") public ITupleIterator rangeIterator(// byte[] fromKey,// byte[] toKey, // final int capacity, // final int flags,// final IFilter filter// ) { if (fromKey == null || toKey == null) { /* * Note: When an index partition is split, the new index partitions * will initially use the same source index segments as the original * index partition. Therefore we MUST impose an explicit constraint * on the fromKey / toKey if none is given so that we do not read * tuples lying outside of the index partition boundaries! However, * if there is only a BTree in the view then the partition metadata * might not be defined, so we check for that first. */ final LocalPartitionMetadata pmd = getIndexMetadata() .getPartitionMetadata(); if (pmd != null) { if (fromKey == null) { fromKey = pmd.getLeftSeparatorKey(); } if (toKey == null) { toKey = pmd.getRightSeparatorKey(); } } } // reverse scan? final boolean reverseScan = (flags & REVERSE) != 0; // cursor requested? final boolean cursorMode = (flags & CURSOR) != 0; // read only? final boolean readOnly = ((flags & READONLY) != 0); // iff the aggregate iterator should visit deleted entries. final boolean deleted = (flags & DELETED) != 0; // removeAll? final boolean removeAll = (flags & REMOVEALL) != 0; if (readOnly && removeAll) { // REMOVEALL is not compatible with READONLY. throw new IllegalArgumentException(); } final int n = sources.getSourceCount(); if (log.isInfoEnabled()) log.info("nsrcs=" + n + ", flags=" + flags + ", readOnly=" + readOnly + ", deleted=" + deleted + ", reverseScan=" + reverseScan); /* * Note: We request KEYS since we need to compare the keys in order to * decide which tuple to return next. * * Note: We request DELETED so that we will see deleted entries. This is * necessary in order for processing to stop at the first entry for a * give key regardless of whether it is deleted or not. If the caller * does not want to see the deleted tuples, then they are silently * dropped from the aggregate iterator. * * Note: The [filter] is NOT passed through to the source iterators. * This is because the filter must be applied to the aggregate iterator * in order to operate on the fused view. * * Note: The REVERSE flag is NOT passed through to the source iterators. * It is handled below by layering on a filter. * * Note: The REMOVEALL flag is NOT passed through to the source * iterators. It is handled below by laying on a filter. */ final int sourceFlags = (// (flags | KEYS | DELETED)// | (reverseScan || removeAll ? CURSOR : 0) // )// & (~REMOVEALL)// turn off & (~REVERSE)// turn off ; /* * The source iterator produces a fused view of the source indices. We * then layer the filter(s) over the fused view iterator. A subclass is * used if CURSOR support is required for the fused view. */ ITupleIterator src; if (cursorMode || removeAll || reverseScan) { /* * CURSOR was specified for the aggregate iterator or is required to * support REMOVEALL. */ final ITupleCursor[] itrs = new ITupleCursor[n]; int i = 0; for(AbstractBTree t : sources) { itrs[i++] = (ITupleCursor) t.rangeIterator(fromKey, toKey, capacity, sourceFlags, null/* filter */); } // Note: aggregate source implements ITupleCursor. src = new FusedTupleCursor(flags, deleted, itrs, readOnly?new ReadOnlyIndex(this):this); } else { /* * CURSOR was neither specified nor required for the aggregate * iterator. * * Note: If reverse was specified, then we pass it into the source * iterators for the source B+Trees. The resulting fused iterator * will already have reversal traversal semantics so we do not need * to layer a Reverserator on top. */ final ITupleIterator[] itrs = new ITupleIterator[n]; int i = 0; for (AbstractBTree t : sources) { itrs[i++] = t.rangeIterator(fromKey, toKey, capacity, sourceFlags, null/* filter */); } src = new FusedTupleIterator(flags, deleted, itrs); } if (reverseScan) { /* * Reverse scan iterator. * * Note: The reverse scan MUST be layered directly over the * ITupleCursor. Most critically, REMOVEALL combined with a REVERSE * scan needs to process the tuples in reverse index order and then * delete them as it goes. */ src = new Reverserator((ITupleCursor) src); } if (filter != null) { /* * Apply the optional filter. * * Note: This needs to be after the reverse scan and before * REMOVEALL (those are the assumptions for the flags). */ src = new WrappedTupleIterator(filter .filter(src, null/* context */)); } if ((flags & REMOVEALL) != 0) { assertNotReadOnly(); /* * Note: This iterator removes each tuple that it visits from the * source iterator. */ src = new TupleRemover() { private static final long serialVersionUID = 1L; @Override protected boolean remove(ITuple e) { // remove all visited tuples. return true; } }.filterOnce(src, null/* context */); } return src; } @Override final public <T> T submit(final byte[] key, final ISimpleIndexProcedure<T> proc) { return proc.apply(this); } @Override @SuppressWarnings("unchecked") final public void submit(byte[] fromKey, byte[] toKey, final IKeyRangeIndexProcedure proc, final IResultHandler handler) { if (fromKey == null) { /* * Note: When an index partition is split, the new index partitions * will initially use the same source index segments as the original * index partition. Therefore we MUST impose an explicit constraint * on the fromKey/toKey if none is given so that we do not read * tuples lying outside of the index partition boundaries! */ fromKey = getIndexMetadata().getPartitionMetadata() .getLeftSeparatorKey(); } if (toKey == null) { /* * Note: When an index partition is split, the new index partitions * will initially use the same source index segments as the original * index partition. Therefore we MUST impose an explicit constraint * on the fromKey/toKey if none is given so that we do not read * tuples lying outside of the index partition boundaries! */ toKey = getIndexMetadata().getPartitionMetadata() .getRightSeparatorKey(); } final Object result = proc.apply(this); if (handler != null) { handler.aggregate(result, new Split(null,0,0)); } } @Override @SuppressWarnings("unchecked") public void submit(final int fromIndex, final int toIndex, final byte[][] keys, final byte[][] vals, final AbstractKeyArrayIndexProcedureConstructor ctor, final IResultHandler aggregator) { final Object result = ctor.newInstance(this, fromIndex, toIndex, keys, vals).apply(this); if (aggregator != null) { aggregator.aggregate(result, new Split(null, fromIndex, toIndex)); } } /** * Inner class providing a fused view of the optional bloom filters * associated with each of the source indices. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ protected class FusedBloomFilter implements IBloomFilter { /** * Unsupported operation. * * @throws UnsupportedOperationException * always. */ public boolean add(byte[] key) { throw new UnsupportedOperationException(); } /** * Applies the {@link IBloomFilter} for each source index in turn and * returns <code>true</code> if ANY of the component index filters * return <code>true</code> (if any filters say that their index has * data for that key then you need to read the index). If a filter does * not exist (or has been disabled) for a given component index then the * code treats the filter as always reporting <code>true</code> (that * is, forcing us to check the index). So a source component index * without a bloom filter or an index with a disabled bloom filter acts * as its filter has a high false positive rate, however the test is a * NOP so it is cheap. */ public boolean contains(final byte[] key) { final AbstractBTree[] srcs = getSources(); for (int i = 0; i < srcs.length; i++) { final AbstractBTree src = srcs[i]; final IBloomFilter filter = src.getBloomFilter(); if ((i == 0 || i == 1) && srcs.length > i + 1 && src instanceof BTree && srcs[i + 1].getBloomFilter() != null) { /* * Do a real point test when we have a FusedView and the 1st * or 2nd component of the view is a BTree and there are * additional components in the view and they have a bloom * filter enabled. This covers the case where the BTree is * absorbing writes (either isolated or unisolated, which is * why we allow the 1st or 2nd component) and there are * additional views, which presumably are IndexSegments. The * reasoning is that the BTree contains test will be * relatively fast and we still get to apply the bloom * filters on the index segments, thereby avoiding a disk * hit in those cases where the bloom filter for the mutable * index on a ManagedJournal has been turned off but the * index segments have perfect bloom filters that we still * want to leverage. Testing the BTree instance here might * touch the disk, but it will force the node path into the * cache so if we do get a 'true' response from one of the * bloom filters and have to test the indices it will not * add additional disk hits. */ if(src.contains(key)) { // proven (but interpreted as probable hit). return true; } // test the next source index. continue; } if (filter == null || filter.contains(key)) { /* * Either no filter, a disabled filter, or the filter exists * and reports that it has seen the key. At the worst, this * is a false positive and we will be forced to check the * index. */ return true; } } // proven to not be in the index. return false; } /** * This implementation notifies the bloom filter for the first source * index (if it exists). Normally false positives will be reported * directly to the specific bloom filter instance by the contains() or * lookup() method for that index. However, the * {@link AccessPath} also tests the bloom filter and needs a * means to report false positives. It should be the only one that calls * this method on this implementation class. */ public void falsePos() { final IBloomFilter filter = getMutableBTree().getBloomFilter(); if (filter != null) { filter.falsePos(); } } } }