/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Aug 16, 2010 */ package com.bigdata.bop.ap; import it.unimi.dsi.bits.BitVector; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.Map; import java.util.Random; import java.util.concurrent.Callable; import com.bigdata.bop.AbstractAccessPathOp; import com.bigdata.bop.BOp; import com.bigdata.bop.BOpContextBase; import com.bigdata.bop.IPredicate; import com.bigdata.btree.AbstractBTree; import com.bigdata.btree.ILeafCursor; import com.bigdata.btree.ILinearList; import com.bigdata.btree.IRangeQuery; import com.bigdata.btree.ITuple; import com.bigdata.btree.ITupleCursor; import com.bigdata.btree.filter.Advancer; import com.bigdata.btree.view.FusedView; import com.bigdata.relation.IRelation; import com.bigdata.relation.accesspath.AccessPath; import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.rule.IAccessPathExpander; import com.bigdata.striterator.IKeyOrder; import com.bigdata.util.Bytes; import cutthecrap.utils.striterators.IFilter; /** * Sampling operator for the {@link IAccessPath} implied by an * {@link IPredicate}. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id: AbstractSampleIndex.java 3672 2010-09-28 23:39:42Z thompsonbry * $ * @param <E> * The generic type of the elements materialized from that index. * * @todo This is a basic operator which is designed to support adaptive query * optimization. However, there are a lot of possible semantics for * sampling, including: uniform distribution, randomly distribution, tuple * at a time versus clustered (sampling with leaves), adaptive sampling * until the sample reflects some statistical property of the underlying * population, etc. Support for different kinds of sampling could be added * using appropriate annotations. */ public class SampleIndex<E> extends AbstractAccessPathOp<E> { /** * */ private static final long serialVersionUID = 1L; /** * Typesafe enumeration of different kinds of index sampling strategies. * * @todo It is much more efficient to take clusters of samples when you can * accept the bias. Taking a clustered sample really requires knowing * where the leaf boundaries are in the index, e.g., using * {@link ILeafCursor}. Taking all tuples from a few leaves in each * sample might produce a faster estimation of the correlation when * sampling join paths. */ public static enum SampleType { /** * Samples are taken at even space offsets. This produces a sample * without any random effects. Re-sampling an index having the same data * with the same key-range and the limit will always return the same * results. This is useful to make unit test repeatable. */ EVEN, /** * Sample offsets are computed randomly. */ RANDOM, /** * The samples will be dense and may bave a front bias. This mode * emphasizes the locality of the samples on the index pages and * minimizes the IO associated with sampling. */ DENSE; } /** * Known annotations. */ public interface Annotations extends BOp.Annotations { /** * The sample limit (default {@value #DEFAULT_LIMIT}). */ String LIMIT = (SampleIndex.class.getName() + ".limit").intern(); int DEFAULT_LIMIT = 100; /** * The random number generator seed -or- ZERO (0L) for a random seed * (default {@value #DEFAULT_SEED}). A non-zero value may be used to * create a repeatable sample. */ String SEED = (SampleIndex.class.getName() + ".seed").intern(); long DEFAULT_SEED = 0L; /** * The {@link IPredicate} describing the access path to be sampled * (required). */ String PREDICATE = (SampleIndex.class.getName() + ".predicate").intern(); /** * The type of sample to take (default {@value #DEFAULT_SAMPLE_TYPE)}. */ String SAMPLE_TYPE = (SampleIndex.class.getName() + ".sampleType").intern(); String DEFAULT_SAMPLE_TYPE = SampleType.RANDOM.name(); } public SampleIndex(SampleIndex<E> op) { super(op); } public SampleIndex(BOp[] args, Map<String, Object> annotations) { super(args, annotations); } /** * @see Annotations#LIMIT */ public int limit() { return getProperty(Annotations.LIMIT, Annotations.DEFAULT_LIMIT); } /** * @see Annotations#SEED */ public long seed() { return getProperty(Annotations.SEED, Annotations.DEFAULT_SEED); } /** * @see Annotations#SAMPLE_TYPE */ public SampleType getSampleType() { return SampleType.valueOf(getProperty(Annotations.SAMPLE_TYPE, Annotations.DEFAULT_SAMPLE_TYPE)); } @SuppressWarnings("unchecked") public IPredicate<E> getPredicate() { return (IPredicate<E>) getRequiredProperty(Annotations.PREDICATE); } /** * Return a sample from the access path associated with the * {@link Annotations#PREDICATE}. */ public E[] eval(final BOpContextBase context) { try { return new SampleTask(context).call(); } catch (Exception e) { throw new RuntimeException(e); } } /** * Sample an {@link IAccessPath}. * * FIXME This needs to handle each of the following conditions: * <p> * Timestamp {read-historical, read-committed, read-write tx, unisolated}<br> * Index view {standalone, partitioned,global view of partitioned}<br> * * @todo The general approach uses the {@link ILinearList} interface to take * evenly distributed or randomly distributed samples from the * underlying index. This is done using an {@link IFilter} which is * evaluated local to the index. This works whether or not the access * path is using a partitioned view of the index. * <p> * When sampling an index shard the {@link ILinearList} API is not * defined for the {@link FusedView}. Since this sampling operator * exists for the purposes of estimating the cardinality of an access * path, we can dispense with the fused view and collect a number of * samples from each component of that view which is proportional to * the range count of the view divided by the range count of the * component index. This may cause tuples which have since been * deleted to become visible, but this should not cause problems when * estimating the cardinality of a join path as long as we always * report the actual tuples from the fused view in the case where the * desired sample size is LTE the estimated range count of the access * path. * * @todo Better performance could be realized by accepting all tuples in a * leaf. This requires a sensitivity to the leaf boundaries which * might be obtained with an {@link ITupleCursor} extension interface * for local indices or with the {@link ILeafCursor} interface if that * can be exposed from a sufficiently low level {@link ITupleCursor} * implementation. However, when they are further constraints layered * onto the access path by the {@link IPredicate} it may be that such * clustered (leaf at once) sampling is not practical. * * @todo When sampling a global view of a partitioned index, we should focus * the sample on a subset of the index partitions in order to * "cluster" the effort. This can of course introduce bias. However, * if there are a lot of index partitions then the sample will of * necessity be very small in proportion to the data volume and the * opportunity for bias will be correspondingly large. * * @todo If there is an {@link IAccessPathExpander} then */ private class SampleTask implements Callable<E[]> { private final BOpContextBase context; SampleTask(final BOpContextBase context) { this.context = context; } /** Return a sample from the access path. */ public E[] call() throws Exception { return sample(limit(), getSampleType(), getPredicate()).getSample(); } /** * Return a sample from the access path. * * @param limit * @return */ public AccessPathSample<E> sample(final int limit, final SampleType sampleType, IPredicate<E> predicate) { final IRelation<E> relation = context.getRelation(predicate); // @todo assumes raw AP. final AccessPath<E> accessPath = (AccessPath<E>) context .getAccessPath(relation, predicate); final long rangeCount = accessPath.rangeCount(false/* exact */); if (limit >= rangeCount) { /* * The sample will contain everything in the access path. */ return new AccessPathSample<E>(limit, accessPath); } /* * Add the CURSOR and PARALLEL flags to the predicate. * * @todo turn off REVERSE if specified. */ final int flags = predicate.getProperty( IPredicate.Annotations.FLAGS, IPredicate.Annotations.DEFAULT_FLAGS) | IRangeQuery.CURSOR | IRangeQuery.PARALLEL; predicate = (IPredicate<E>) predicate.setProperty( IPredicate.Annotations.FLAGS, flags); /* * Add advancer to collect sample. */ final Advancer<E> advancer; switch (sampleType) { case EVEN: advancer = new EvenSampleAdvancer<E>(// rangeCount, limit, accessPath.getFromKey(), accessPath.getToKey()); break; case RANDOM: advancer = new RandomSampleAdvancer<E>(// rangeCount, seed(), limit, accessPath.getFromKey(), accessPath .getToKey()); break; case DENSE: advancer = new DenseSampleAdvancer<E>(); break; default: throw new UnsupportedOperationException("SampleType=" + sampleType); } predicate = ((Predicate<E>) predicate) .addIndexLocalFilter(advancer); return new AccessPathSample<E>(limit, context.getAccessPath( relation, predicate)); } } /** * Dense samples in key order (simple index scan). * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @param <E> */ private static class DenseSampleAdvancer<E> extends Advancer<E> { private static final long serialVersionUID = 1L; @Override protected void advance(final ITuple<E> tuple) { // NOP } } /** * An advancer pattern which is designed to take evenly distributed samples * from an index. The caller specifies the #of tuples to be sampled. This * class estimates the range count of the access path and then computes the * #of samples to be skipped after each tuple visited. * <p> * Note: This can fail to gather the desired number of sample if additional * filters are applied which further restrict the elements selected by the * predicate. However, it will still faithfully represent the expected * cardinality of the sampled access path (tuples tested). * * @author thompsonbry@users.sourceforge.net * * @param <E> * The generic type of the elements visited by that access path. */ private static class EvenSampleAdvancer<E> extends Advancer<E> { private static final long serialVersionUID = 1L; /** The desired total limit on the sample. */ private final int limit; private final byte[] /*fromKey,*/ toKey; /* * Transient data. This gets initialized when we visit the first tuple. */ /** The #of tuples to be skipped after every tuple visited. */ private transient long skipCount; /** The #of tuples accepted so far. */ private transient int nread = 0; /** The inclusive lower bound of the first tuple actually visited. */ private transient long fromIndex; /** The exclusive upper bound of the last tuple which could be visited. */ private transient long toIndex; /** * * @param limit * The #of samples to visit. */ public EvenSampleAdvancer(final int limit, final byte[] fromKey, final byte[] toKey) { this.limit = limit; this.toKey = toKey; } @Override protected void advance(final ITuple<E> tuple) { final AbstractBTree ndx = (AbstractBTree) src.getIndex(); final long currentIndex = ndx.indexOf(tuple.getKey()); if (nread == 0) { // inclusive lower bound. fromIndex = currentIndex; // exclusive upper bound. toIndex = toKey == null ? ndx.getEntryCount() : ndx .indexOf(toKey); if (toIndex < 0) { // convert insert position to index. toIndex = -toIndex + 1; } final long rangeCount = (toIndex - fromIndex); skipCount = Math.max(1L, rangeCount / limit); // minus one since src.next() already consumed one tuple. skipCount -= 1; // System.err.println("limit=" + limit + ", rangeCount=" // + rangeCount + ", skipCount=" + skipCount); } nread++; if (skipCount > 0) { /* * If the skip count is positive, then skip over N tuples. */ final long nextIndex = Math.min(ndx.getEntryCount() - 1, currentIndex + skipCount); src.seek(ndx.keyAt(nextIndex)); } } } // class EvenSampleAdvancer /** * An advancer pattern which is designed to take randomly distributed * samples from an index. The caller specifies the #of tuples to be sampled. * This class estimates the range count of the access path and then computes * a set of random offsets into the access path from which it will collect * the desired #of samples. * <p> * Note: This can fail to gather the desired number of sample if additional * filters are applied which further restrict the elements selected by the * predicate. However, it will still faithfully represent the expected * cardinality of the sampled access path (tuples tested). * * @author thompsonbry@users.sourceforge.net * * @param <E> * The generic type of the elements visited by that access path. */ private static class RandomSampleAdvancer<E> extends Advancer<E> { private static final long serialVersionUID = 1L; /** The random number generator seed. */ private final long seed; /** The desired total limit on the sample. */ private final int limit; private final byte[] fromKey, toKey; /* * Transient data. This gets initialized when we visit the first tuple. */ /** The offset of each tuple to be sampled. */ private transient long[] offsets; /** The #of tuples accepted so far. */ private transient int nread = 0; /** The inclusive lower bound of the first tuple actually visited. */ private transient long fromIndex; /** The exclusive upper bound of the last tuple which could be visited. */ private transient long toIndex; /** * * @param limit * The #of samples to visit. */ public RandomSampleAdvancer(final long seed, final int limit, final byte[] fromKey, final byte[] toKey) { this.seed = seed; this.limit = limit; this.fromKey = fromKey; this.toKey = toKey; } @Override protected boolean init() { final AbstractBTree ndx = (AbstractBTree) src.getIndex(); // inclusive lower bound. fromIndex = fromKey == null ? 0 : ndx.indexOf(fromKey); if (fromIndex < 0) { // convert insert position to index. fromIndex = -fromIndex + 1; } // exclusive upper bound. toIndex = toKey == null ? ndx.getEntryCount() : ndx.indexOf(toKey); if (toIndex < 0) { // convert insert position to index. toIndex = -toIndex + 1; } // get offsets to be sampled. offsets = new SmartOffsetSampler().getOffsets(seed, limit, fromIndex, toIndex); // Skip to the first tuple. src.seek(ndx.keyAt(offsets[0])); return true; } @Override protected void advance(final ITuple<E> tuple) { final AbstractBTree ndx = (AbstractBTree) src.getIndex(); if (nread < offsets.length - 1) { /* * Skip to the next tuple. */ final long nextIndex = offsets[nread]; // System.err.println("limit=" + limit + ", rangeCount=" // + (toIndex - fromIndex) + ", fromIndex=" + fromIndex // + ", toIndex=" + toIndex + ", currentIndex=" // + currentIndex + ", nextIndex=" + nextIndex); src.seek(ndx.keyAt(nextIndex)); } nread++; } } // class RandomSampleAdvancer /** * A sample from an access path. * * @param <E> * The generic type of the elements visited by that access * path. * * @author thompsonbry@users.sourceforge.net */ public static class AccessPathSample<E> implements Serializable { private static final long serialVersionUID = 1L; private final IPredicate<E> pred; private final IKeyOrder<E> keyOrder; private final int limit; private final E[] sample; /** * Constructor populates the sample using the caller's * {@link IAccessPath#iterator()}. The caller is responsible for setting * up the {@link IAccessPath} such that it provides an efficient sample * of the access path with the appropriate constraints. * * @param limit * @param accessPath */ private AccessPathSample(final int limit, final IAccessPath<E> accessPath) { if (limit <= 0) throw new IllegalArgumentException(); if (accessPath == null) throw new IllegalArgumentException(); this.pred = accessPath.getPredicate(); this.keyOrder = accessPath.getKeyOrder(); this.limit = limit; // drain the access path iterator. final ArrayList<E> tmp = new ArrayList<E>(limit); int nsamples = 0; final Iterator<E> src = accessPath.iterator(0L/* offset */, limit, limit/* capacity */); while (src.hasNext() && nsamples < limit) { tmp.add(src.next()); nsamples++; } // convert to an array of the appropriate type. sample = tmp.toArray((E[]) java.lang.reflect.Array.newInstance( tmp.get(0).getClass(), tmp.size())); } public IPredicate<E> getPredicate() { return pred; } public boolean isEmpty() { return sample != null; } public int sampleSize() { return sample == null ? 0 : sample.length; } public int limit() { return limit; } /** * The sample. * * @return The sample -or- <code>null</code> if the sample was * empty. */ public E[] getSample() { return sample; } } // AccessPathSample /** * Interface for obtaining an array of tuple offsets to be sampled. * * @author thompsonbry */ public interface IOffsetSampler { /** * Return an array of tuple indices which may be used to sample a key * range of some index. * <p> * Note: The caller must stop when it runs out of offsets, not when the * limit is satisfied, as there will be fewer offsets returned when the * half open range is smaller than the limit. * * @param seed * The seed for the random number generator -or- ZERO (0L) * for a random seed. A non-zero value may be used to create * a repeatable sample. * @param limit * The maximum #of tuples to sample. * @param fromIndex * The inclusive lower bound. * @param toIndex * The exclusive upper bound0 * * @return An array of at most <i>limit</i> offsets into the index. The * offsets will lie in the half open range (fromIndex,toIndex]. * The elements of the array will be in ascending order. No * offsets will be repeated. * * @throws IllegalArgumentException * if <i>limit</i> is non-positive. * @throws IllegalArgumentException * if <i>fromIndex</i> is negative. * @throws IllegalArgumentException * if <i>toIndex</i> is negative. * @throws IllegalArgumentException * unless <i>toIndex</i> is GT <i>fromIndex</i>. */ long[] getOffsets(long seed, int limit, long fromIndex, long toIndex); } /** * A smart implementation which uses whichever implementation is most * efficient for the limit and key range to be sampled. * * @author thompsonbry */ public static class SmartOffsetSampler implements IOffsetSampler { /** * {@inheritDoc} */ public long[] getOffsets(final long seed, int limit, final long fromIndex, final long toIndex) { if (limit < 1) throw new IllegalArgumentException(); if (fromIndex < 0) throw new IllegalArgumentException(); if (toIndex < 0) throw new IllegalArgumentException(); if (toIndex <= fromIndex) throw new IllegalArgumentException(); final long rangeCount = (toIndex - fromIndex); if (limit > rangeCount) { /* * Note: cast valid since limit is int32 and limit LT rangeCount * so rangeCount may be cast to int32. */ limit = (int) rangeCount; } if (limit == rangeCount) { // Visit everything. return new EntireRangeOffsetSampler().getOffsets(seed, limit, fromIndex, toIndex); } /* * Random offsets visiting a subset of the key range using a * selection without replacement pattern (the same tuple is never * visited twice). * * FIXME When the limit approaches the range count and the range * count is large (too large for a bit vector or acceptance set * approach), then we are better off creating a hash set of offsets * NOT to be visited and then just choosing (rangeCount-limit) * offsets to reject. This will be less expensive than computing the * acceptance set directly. However, to really benefit from the * smaller memory profile, we would also need to wrap that with an * iterator pattern so the smaller memory representation could be of * use when the offset[] is applied (e.g., modify the IOffsetSampler * interface to be an iterator with various ctor parameters rather * than returning an array as we do today). */ // FIXME BitVectorOffsetSampler is broken. if (false && rangeCount < Bytes.kilobyte32 * 8) { // NB: 32k range count uses a 4k bit vector. return new BitVectorOffsetSampler().getOffsets(seed, limit, fromIndex, toIndex); } /* * When limit is small (or significantly smaller than the * rangeCount), then we are much better off creating a hash set of * the offsets which have been accepted. * * Good unless [limit] is very large. */ return new AcceptanceSetOffsetSampler().getOffsets(seed, limit, fromIndex, toIndex); } } /** * Returns all offsets in the half-open range, but may only be used when * the limit GTE the range count. */ static public class EntireRangeOffsetSampler implements IOffsetSampler { /** * {@inheritDoc} * * @throws UnsupportedOperationException * if <i>limit!=rangeCount</i> (after adjusting for limits * greater than the rangeCount). */ public long[] getOffsets(final long seed, int limit, final long fromIndex, final long toIndex) { if (limit < 1) throw new IllegalArgumentException(); if (fromIndex < 0) throw new IllegalArgumentException(); if (toIndex < 0) throw new IllegalArgumentException(); if (toIndex <= fromIndex) throw new IllegalArgumentException(); final long rangeCount = (toIndex - fromIndex); if (limit > rangeCount) { /* * Note: cast valid since limit is int32 and limit LT rangeCount * so rangeCount may be cast to int32. */ limit = (int) rangeCount; } if (limit != rangeCount) throw new UnsupportedOperationException(); // offsets of tuples to visit. final long[] offsets = new long[limit]; for (int i = 0; i < limit; i++) { offsets[i] = fromIndex + i; } return offsets; } } /** * Return a randomly selected ordered array of offsets in the given * half-open range. * <p> * This approach is based on a bit vector. If the bit is already marked, * then the offset has been used and we scan until we find the next free * offset. This requires [rangeCount] bits, so it works well when the * rangeCount of the key range is small. For example, a range count of 32k * requires a 4kb bit vector, which is quite manageable. * * FIXME There is something broken in this class, probably an assumption I * have about how {@link LongArrayBitVector} works. If you enable it in the * stress test, it will fail. */ static public class BitVectorOffsetSampler implements IOffsetSampler { /** * {@inheritDoc} * <p> * Note: The utility of this class is limited to smaller range counts * (32k is fine, 2x or 4k that is also Ok) so it will reject anything * with a very large range count. * * @throws UnsupportedOperationException * if the rangeCount is GT {@link Integer#MAX_VALUE} */ public long[] getOffsets(final long seed, int limit, final long fromIndex, final long toIndex) { if (limit < 1) throw new IllegalArgumentException(); if (fromIndex < 0) throw new IllegalArgumentException(); if (toIndex < 0) throw new IllegalArgumentException(); if (toIndex <= fromIndex) throw new IllegalArgumentException(); final long rangeCount2 = (toIndex - fromIndex); if (rangeCount2 > Integer.MAX_VALUE) { /* * The utility of this class is limited to smaller range counts * so it will reject anything with a very large range count. */ throw new UnsupportedOperationException(); } // known to be an int32 value. final int rangeCount = (int) rangeCount2; if (limit > rangeCount) { limit = rangeCount; } // offsets of tuples to visit. final long [] offsets = new long [limit]; // create a cleared bit vector of the stated capacity. final BitVector v = LongArrayBitVector.ofLength(// rangeCount// capacity (in bits) ); // Random number generator using caller's seed (if given). final Random rnd = seed == 0L ? new Random() : new Random(seed); // Choose random tuple indices for the remaining tuples. for (int i = 0; i < limit; i++) { /* * Look for an unused bit starting at this index. If necessary, * this will wrap around to zero. */ // k in (0:rangeCount-1). int k = rnd.nextInt(rangeCount); if (v.getBoolean((long) k)) { // This bit is already taken. final long nextZero = v.nextZero((long) k); if (nextZero != -1L) { k = (int) nextZero; } else { final long priorZero = v.previousZero((long) k); if (priorZero != -1L) { k = (int) priorZero; } else { // No empty bit found? throw new AssertionError(); } } } assert !v.getBoolean(k); // Set the bit. v.add(k, true); assert v.getBoolean(k); offsets[i] = fromIndex + k; assert offsets[i] < toIndex; } // put them into sorted order for more efficient traversal. Arrays.sort(offsets); // System.err.println(Arrays.toString(offsets)); return offsets; } } /** * An implementation based on an acceptance set of offsets which have been * accepted. This implementation is a good choice when the limit moderate * (~100k) and the rangeCount is significantly greater than the limit. The * memory demand is the O(limit). * * @author thompsonbry */ static public class AcceptanceSetOffsetSampler implements IOffsetSampler { /** * {@inheritDoc} * <p> * Note: The utility of this class is limited to moderate range counts * (~100k) so it will reject anything with a very large range count. * * @throws UnsupportedOperationException * if the rangeCount is GT {@link Integer#MAX_VALUE} */ public long[] getOffsets(final long seed, int limit, final long fromIndex, final long toIndex) { if (limit < 1) throw new IllegalArgumentException(); if (fromIndex < 0) throw new IllegalArgumentException(); if (toIndex < 0) throw new IllegalArgumentException(); if (toIndex <= fromIndex) throw new IllegalArgumentException(); final long rangeCount2 = (toIndex - fromIndex); if (rangeCount2 > Integer.MAX_VALUE) throw new UnsupportedOperationException(); final int rangeCount = (int) rangeCount2; if (limit > rangeCount) { limit = rangeCount; } // offsets of tuples to visit. final long [] offsets = new long[limit]; // hash set of accepted offsets. final IntOpenHashSet v = new IntOpenHashSet( rangeCount// capacity ); // Random number generator using caller's seed (if given). final Random rnd = seed == 0L ? new Random() : new Random(seed); // Choose random tuple indices for the remaining tuples. for (int i = 0; i < limit; i++) { /* * Look for an unused bit starting at this index. If necessary, * this will wrap around to zero. */ // k in (0:rangeCount-1). int k = rnd.nextInt(rangeCount); int round = 0; while (v.contains(k)) { k++; if (k == rangeCount) { // wrap around. if (++round > 1) { // no empty bit found? throw new AssertionError(); } // reset starting index. k = 0; } } assert !v.contains(k); // Set the bit. v.add(k); offsets[i] = fromIndex + k; assert offsets[i] < toIndex; } // put them into sorted order for more efficient traversal. Arrays.sort(offsets); // System.err.println(Arrays.toString(offsets)); return offsets; } } }