JoinTask.java example

Explorer
blazegraph-master
- database-master
package com.bigdata.relation.rule.eval.pipeline;

import java.io.IOException;
import java.nio.channels.ClosedByInterruptException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.Callable;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.FutureTask;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;

import org.apache.log4j.Level;
import org.apache.log4j.Logger;

import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IPredicate;
import com.bigdata.bop.IVariable;
import com.bigdata.btree.AbstractBTree;
import com.bigdata.journal.AbstractTask;
import com.bigdata.journal.ConcurrencyManager;
import com.bigdata.journal.IIndexManager;
import com.bigdata.journal.IIndexStore;
import com.bigdata.journal.IJournal;
import com.bigdata.journal.ITx;
import com.bigdata.rdf.spo.SPOKeyOrder;
import com.bigdata.relation.IRelation;
import com.bigdata.relation.accesspath.AccessPath;
import com.bigdata.relation.accesspath.AbstractUnsynchronizedArrayBuffer;
import com.bigdata.relation.accesspath.BlockingBuffer;
import com.bigdata.relation.accesspath.BufferClosedException;
import com.bigdata.relation.accesspath.IAccessPath;
import com.bigdata.relation.accesspath.IAsynchronousIterator;
import com.bigdata.relation.accesspath.IBlockingBuffer;
import com.bigdata.relation.accesspath.IBuffer;
import com.bigdata.relation.accesspath.UnsynchronizedArrayBuffer;
import com.bigdata.relation.rule.IRule;
import com.bigdata.relation.rule.IStarJoin;
import com.bigdata.relation.rule.IStarJoin.IStarConstraint;
import com.bigdata.relation.rule.eval.ChunkTrace;
import com.bigdata.relation.rule.eval.IJoinNexus;
import com.bigdata.relation.rule.eval.ISolution;
import com.bigdata.service.DataService;
import com.bigdata.service.IDataService;
import com.bigdata.striterator.IChunkedOrderedIterator;
import com.bigdata.striterator.IKeyOrder;
import com.bigdata.util.BytesUtil;
import com.bigdata.util.InnerCause;
import com.bigdata.util.concurrent.LatchedExecutor;

/**
 * Consumes {@link IBindingSet} chunks from the previous join dimension.
 * <p>
 * Note: Instances of this class MUST be created on the {@link IDataService}
 * that is host to the index partition on the task will read and they MUST run
 * inside of an {@link AbstractTask} on the {@link ConcurrencyManager} in order
 * to have access to the local index object for the index partition.
 * <p>
 * This class is NOT serializable.
 * <p>
 * For a rule with 2 predicates, there will be two {@link JoinTask}s. The
 * {@link #orderIndex} is ZERO (0) for the first {@link JoinTask} and ONE (1)
 * for the second {@link JoinTask}. The first {@link JoinTask} will have a
 * single initialBinding from the {@link JoinMasterTask} and will read on the
 * {@link IAccessPath} for the first {@link IPredicate} in the evaluation
 * {@link #order}. The second {@link JoinTask} will read chunks of
 * {@link IBindingSet}s containing partial solutions from the first
 * {@link JoinTask} and will obtain and read on an {@link IAccessPath} for the
 * second predicate in the evaluation order for every partial solution. Since
 * there are only two {@link IPredicate}s in the {@link IRule}, the second and
 * last {@link JoinTask} will write on the {@link ISolution} buffer obtained
 * from {@link JoinMasterTask#getSolutionBuffer()}. Each {@link JoinTask} will
 * report its {@link JoinStats} to the master, which aggregates those
 * statistics.
 * <p>
 * Note: {@link ITx#UNISOLATED} requests will deadlock if the same query uses
 * the same access path for two predicates! This is because the first such join
 * dimension in the evaluation order will obtain an exclusive lock on an index
 * partition making it impossible for another {@link JoinTask} to obtain an
 * exclusive lock on the same index partition. This is not a problem if you are
 * using read-consistent timestamps!
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 * @version $Id$
 * 
 * @todo Allow the access paths to be consumed in parallel. this would let us
 *       use more threads for join dimensions that had to test more source
 *       binding sets.
 *       <p>
 *       Parallel {@link AccessPathTask} processing is useful when each
 *       {@link AccessPathTask} consumes only a small chunk and there are a
 *       large #of source binding sets to be processed. In this case,
 *       parallelism reduces the overall latency by allowing threads to progress
 *       as soon as the data can be materialized from the index.
 *       {@link AccessPathTask} parallelism is realized by submitting each
 *       {@link AccessPathTask} to a service imposing a parallelism limit on the
 *       shared {@link IIndexStore#getExecutorService()}. Since the
 *       {@link AccessPathTask}s are concurrent, each one requires its own
 *       {@link UnsynchronizedOutputBuffer} on which it will place any accepted
 *       {@link IBindingSet}s. Once an {@link AccessPathTask} completes, its
 *       buffer may be reused by the next {@link AccessPathTask} assigned to a
 *       worker thread (this reduces heap churn and allows us to assemble full
 *       chunks when each {@link IAccessPath} realizes only a few accepted
 *       {@link IBindingSet}s). For an {@link ExecutorService} with a
 *       parallelism limit of N, there are therefore N
 *       {@link UnsynchronizedOutputBuffer}s. Those buffers must be flushed when
 *       the {@link JoinTask} exhausts its source(s). If the same set of threads
 *       is not known to be reused for each {@link AccessPathTask} then the
 *       actual #of buffers will be the #of distinct threads used. To reduce the
 *       potential memory demand. striped locks could be used to protect a pool
 *       of {@link UnsynchronizedArrayBuffer}s, but could lead to deadlock if
 *       the buffer reference was exposed to the task (as opposed to adding the
 *       object to the buffer within a private method, which hides that
 *       reference) since there more than one thread demanding access to the
 *       same buffer.
 * 
 * @todo Parallel {@link ChunkTask} processing may be useful when an
 *       {@link AccessPathTask} will consume a large #of chunks. Since the
 *       {@link IAccessPath#iterator()} is NOT thread-safe, reads on the
 *       {@link IAccessPath} must be sequential, but the chunks read from the
 *       {@link IAccessPath} can be placed onto a queue and parallel
 *       {@link ChunkTask}s can drain that queue, consuming the chunks. This can
 *       help by reducing the latency to materialize any given chunk.
 *       <p>
 *       The required change is to have a per-thread
 *       {@link UnsynchronizedArrayBuffer} feeding a thread-safe
 *       {@link UnsyncDistributedOutputBuffer} (potentially via a queue) which
 *       maps each generated binding set across the index partition(s) for the
 *       sink {@link JoinTask}s.
 */
abstract public class JoinTask implements Callable<Void> {

    static protected final Logger log = Logger.getLogger(JoinTask.class);

    /**
     * True iff the {@link #log} level is WARN or less.
     */
    static final protected boolean WARN = log.isEnabledFor(Level.WARN);

    /**
     * True iff the {@link #log} level is INFO or less.
     */
    static final protected boolean INFO = log.isInfoEnabled();

    /**
     * True iff the {@link #log} level is DEBUG or less.
     */
    static final protected boolean DEBUG = log.isDebugEnabled();

    /** The rule that is being evaluated. */
    final protected IRule<?> rule;

    /**
     * The #of predicates in the tail of that rule.
     */
    final protected int tailCount;

    /**
     * The index partition on which this {@link JoinTask} is reading -or-
     * <code>-1</code> if the deployment does not support key-range
     * partitioned indices.
     */
    final protected int partitionId;

    /**
     * The tail index in the rule for the predicate on which we are reading
     * for this join dimension.
     */
    final protected int tailIndex;

    /**
     * The {@link IPredicate} on which we are reading for this join
     * dimension.
     */
    final protected IPredicate<?> predicate;

    /**
     * The {@link IRelation} view on which we are reading for this join
     * dimensions.
     */
    final protected IRelation<?> relation;

    /**
     * The index into the evaluation {@link #order} for the predicate on
     * which we are reading for this join dimension.
     */
    final protected int orderIndex;

    /**
     * <code>true</code> iff this is the last join dimension in the
     * evaluation order.
     */
    final protected boolean lastJoin;

    /**
     * A proxy for the remote {@link JoinMasterTask}.
     */
    final protected IJoinMaster masterProxy;

    final protected UUID masterUUID;
    
    /**
     * A list of variables required for each tail, by tailIndex. Used to filter 
     * downstream variable binding sets.  
     */
    final protected IVariable<?>[][] requiredVars;
    
    /**
     * The {@link IJoinNexus} for the local {@link IIndexManager}, which
     * will be the live {@link IJournal}. This {@link IJoinNexus} MUST have
     * access to the local index objects, which means that class MUST be run
     * inside of the {@link ConcurrencyManager}. The {@link #joinNexus} is
     * created from the {@link #joinNexusFactory} once the task begins to
     * execute.
     */
    protected IJoinNexus joinNexus;

    /**
     * Volatile flag is set <code>true</code> if the {@link JoinTask}
     * (including any tasks executing on its behalf) should halt. This flag
     * is monitored by the {@link BindingSetConsumerTask}, the
     * {@link AccessPathTask}, and the {@link ChunkTask}. It is set by any
     * of those tasks if they are interrupted or error out.
     * 
     * @todo review handling of this flag. Should an exception always be
     *       thrown if the flag is set wrapping the {@link #firstCause}?
     *       Are there any cases where the behavior should be different?
     *       If not, then replace tests with halt() and encapsulate the
     *       logic in that method.
     */
    volatile protected boolean halt = false;

    /**
     * Set by {@link BindingSetConsumerTask}, {@link AccessPathTask}, and
     * {@link ChunkTask} if they throw an error. Tasks are required to use
     * an {@link AtomicReference#compareAndSet(Object, Object)} and must
     * specify <code>null</code> as the expected value. This ensures that
     * only the first cause is recorded by this field.
     */
    final protected AtomicReference<Throwable> firstCause = new AtomicReference<Throwable>(
            null);

    /**
     * Indicate that join processing should halt.  This method is written
     * defensively and will not throw anything.
     * 
     * @param cause
     *            The cause.
     */
    protected void halt(final Throwable cause) {

        halt = true;

        final boolean isFirstCause = firstCause.compareAndSet(null/* expect */, cause);

        if (WARN)

            try {

                if (!InnerCause.isInnerCause(cause, InterruptedException.class)
                        && !InnerCause.isInnerCause(cause,
                                CancellationException.class)
                        && !InnerCause.isInnerCause(cause,
                                ClosedByInterruptException.class)
                        && !InnerCause.isInnerCause(cause,
                                RejectedExecutionException.class)
                        && !InnerCause.isInnerCause(cause,
                                BufferClosedException.class)) {

                    /*
                     * This logs all unexpected causes, not just the first one
                     * to be reported for this join task.
                     * 
                     * Note: The master will log the firstCause that it receives
                     * as an error.
                     */

                    log.warn("orderIndex=" + orderIndex + ", partitionId="
                            + partitionId + ", isFirstCause=" + isFirstCause
                            + " : " + cause.getLocalizedMessage(), cause);

                }

            } catch (Throwable ex) {

                // error in logging system - ignore.

            }

    }

    /**
     * The evaluation order. {@link #orderIndex} is the index into this
     * array. The {@link #orderIndex} is zero (0) for the first join
     * dimension and is incremented by one for each subsequent join
     * dimension. The value at <code>order[orderIndex]</code> is the index
     * of the tail predicate that will be evaluated at a given
     * {@link #orderIndex}.
     */
    final int[] order;

    /**
     * The statistics for this {@link JoinTask}.
     */
	final JoinStats stats;

	/**
	 * A factory pattern for per-thread objects whose life cycle is tied to some
	 * container. For example, there may be an instance of this pool for a
	 * {@link JoinTask} or an {@link AbstractBTree}. The pool can be torn down
	 * when the container is torn down, which prevents its thread-local
	 * references from escaping.
	 * 
	 * @author thompsonbry@users.sourceforge.net
	 * @param <T>
	 *            The generic type of the thread-local object.
	 * 
	 * @todo There should be two implementations of a common interface or
	 *       abstract base class: one based on a private
	 *       {@link ConcurrentHashMap} and the other on striped locks. The
	 *       advantage of the {@link ConcurrentHashMap} is approximately 3x
	 *       higher concurrency. The advantage of striped locks is that you can
	 *       directly manage the #of buffers when when the threads using those
	 *       buffers is unbounded. However, doing so could lead to deadlock
	 *       since two threads can be hashed onto the same buffer object.
	 */
	abstract public class ThreadLocalFactory<T extends IBuffer<E>, E> {

		/**
		 * The thread-local queues.
		 */
		private final ConcurrentHashMap<Thread, T> map;

		/**
		 * A list of all objects visible to the caller. This is used to ensure
		 * that any objects allocated by the factory are visited.
		 * 
		 * <p>Note: Since the
		 * collection is not thread-safe, synchronization is required when
		 * adding to the collection and when visiting the elements of the
		 * collection.
		 */
		private final LinkedList<T> list = new LinkedList<T>();

		protected ThreadLocalFactory() {

			this(16/* initialCapacity */, .75f/* loadFactor */, 16/* concurrencyLevel */);

		}

		protected ThreadLocalFactory(final int initialCapacity,
				final float loadFactor, final int concurrencyLevel) {

			map = new ConcurrentHashMap<Thread, T>(initialCapacity, loadFactor,
					concurrencyLevel);

		}

		/**
		 * Return the #of thread-local objects.
		 */
		final public int size() {

			return map.size();

		}

		/**
		 * Add the element to the thread-local buffer.
		 * 
		 * @param e
		 *            An element.
		 * 
		 * @throws IllegalStateException
		 *             if the factory is asynchronously closed.
		 */
		public void add(E e) {

			get().add(e);

		}

		/**
		 * Return a thread-local buffer
		 * 
		 * @return The thread-local buffer.
		 * 
		 * @throws RuntimeException
		 *             if the join is halted.
		 */
		final private T get() {
			final Thread t = Thread.currentThread();
			T tmp = map.get(t);
			if (tmp == null) {
				if (map.put(t, tmp = initialValue()) != null) {
					/*
					 * Note: Since the key is the thread it is not possible for
					 * there to be a concurrent put of an entry under the same
					 * key so we do not have to use putIfAbsent().
					 */
					throw new AssertionError();
				}
				// Add to list.
				synchronized(list) {
					list.add(tmp);
				}
			}
			if (halt)
				throw new RuntimeException(firstCause.get());
			return tmp;
		}

		/**
		 * Flush each of the unsynchronized buffers onto their backing
		 * synchronized buffer.
		 * 
		 * @throws RuntimeException
		 *             if the join is halted.
		 */
		public void flush() {
			synchronized (list) {
				int n = 0;
				long m = 0L;
				for (T b : list) {
					if (halt)
						throw new RuntimeException(firstCause.get());
					// #of elements to be flushed.
					final int size = b.size();
					// flush, returning total #of elements written onto this
					// buffer.
					final long counter = b.flush();
					m += counter;
					if (DEBUG)
						log.debug("Flushed buffer: size=" + size + ", counter="
								+ counter);
				}
				if (INFO)
					log.info("Flushed " + n
							+ " unsynchronized buffers totalling " + m
							+ " elements");
			}
		}

		/**
		 * Reset each of the synchronized buffers, discarding their buffered
		 * writes.
		 * <p>
		 * Note: This method is used during error processing, therefore it DOES
		 * NOT check {@link JoinTask#halt}.
		 */
		public void reset() {
			synchronized (list) {
				int n = 0;
				for (T b : list) {
					// #of elements in the buffer before reset().
					final int size = b.size();
					// reset the buffer.
					b.reset();
					if (DEBUG)
						log.debug("Reset buffer: size=" + size);
				}
				if (INFO)
					log.info("Reset " + n + " unsynchronized buffers");
			}
		}

//	    /**
//	     * Reset the per-{@link Thread} unsynchronized output buffers (used as
//	     * part of error handling for the {@link JoinTask}).
//	     */
//	    final protected void resetUnsyncBuffers() throws Exception {
	//
//			final int n = threadLocalBufferFactory.reset();
//					.close(new Visitor<AbstractUnsynchronizedArrayBuffer<IBindingSet>>() {
	//
//						@Override
//						public void meet(
//								final AbstractUnsynchronizedArrayBuffer<IBindingSet> b)
//								throws Exception {
	//
	//
//		}

		/**
		 * Create and return a new object.
		 */
		abstract protected T initialValue();

	}

	final private ThreadLocalFactory<AbstractUnsynchronizedArrayBuffer<IBindingSet>, IBindingSet> threadLocalBufferFactory = new ThreadLocalFactory<AbstractUnsynchronizedArrayBuffer<IBindingSet>, IBindingSet>() {

		@Override
		protected AbstractUnsynchronizedArrayBuffer<IBindingSet> initialValue() {

			// new buffer created by the concrete JoinClass impl.
			return newUnsyncOutputBuffer();

		}
	};
    
    /**
     * A method used by the {@link #threadLocalBufferFactory} to create new
     * output buffer as required. The output buffer will be used to
     * aggregate {@link IBindingSet}s generated by this {@link JoinTask}.
     * <p>
     * Note: A different implementation class must be used depending on
     * whether or not this is the last join dimension for the query (when it
     * is, then we write on the solution buffer) and whether or not the
     * target join index is key-range partitioned (when it is, each binding
     * set is mapped across the sink {@link JoinTask}(s)).
     */
    abstract protected AbstractUnsynchronizedArrayBuffer<IBindingSet> newUnsyncOutputBuffer();

    /**
     * The buffer on which the last predicate in the evaluation order will
     * write its {@link ISolution}s.
     * 
     * @return The buffer.
     * 
     * @throws IllegalStateException
     *             unless {@link #lastJoin} is <code>true</code>.
     */
    abstract protected IBuffer<ISolution[]> getSolutionBuffer();

    /**
     * Return the index of the tail predicate to be evaluated at the given
     * index in the evaluation order.
     * 
     * @param orderIndex
     *            The evaluation order index.
     * 
     * @return The tail index to be evaluated at that index in the
     *         evaluation order.
     */
    final protected int getTailIndex(final int orderIndex) {

        assert order != null;

        final int tailIndex = order[orderIndex];

        assert orderIndex >= 0 && orderIndex < tailCount : "orderIndex="
                + orderIndex + ", rule=" + rule;

        return tailIndex;

    }

    public String toString() {

        return getClass().getName() + "{ orderIndex=" + orderIndex
                + ", partitionId=" + partitionId + ", lastJoin=" + lastJoin
                + ", masterUUID=" + masterUUID + "}";

    }

    /**
     * Instances of this class MUST be created in the appropriate execution
     * context of the target {@link DataService} so that the federation and
     * the joinNexus references are both correct and so that it has access
     * to the local index object for the specified index partition.
     * 
     * @param concurrencyManager
     * @param indexName
     * @param rule
     * @param joinNexus
     * @param order
     * @param orderIndex
     * @param partitionId
     *            The index partition identifier and <code>-1</code> if
     *            the deployment does not support key-range partitioned
     *            indices.
     * @param masterProxy
     * 
     * @see JoinTaskFactoryTask
     */
    public JoinTask(/*final String indexName,*/ final IRule rule,
            final IJoinNexus joinNexus, final int[] order,
            final int orderIndex, final int partitionId,
            final IJoinMaster masterProxy, final UUID masterUUID,
            final IVariable[][] requiredVars) {

        if (rule == null)
            throw new IllegalArgumentException();
        if (joinNexus == null)
            throw new IllegalArgumentException();
        final int tailCount = rule.getTailCount();
        if (order == null)
            throw new IllegalArgumentException();
        if (order.length != tailCount)
            throw new IllegalArgumentException();
        if (orderIndex < 0 || orderIndex >= tailCount)
            throw new IllegalArgumentException();
        if (masterProxy == null)
            throw new IllegalArgumentException();
        if (masterUUID == null)
            throw new IllegalArgumentException();
        if (requiredVars == null)
            throw new IllegalArgumentException();

        this.rule = rule;
        this.partitionId = partitionId;
        this.tailCount = tailCount;
        this.orderIndex = orderIndex;
        this.joinNexus = joinNexus;
        this.order = order; // note: assign before using getTailIndex()
        this.tailIndex = getTailIndex(orderIndex);
        this.lastJoin = ((orderIndex + 1) == tailCount);
        this.predicate = rule.getTail(tailIndex);
        this.relation = joinNexus.getTailRelationView(predicate);
        this.stats = new JoinStats(partitionId, orderIndex);
        this.masterProxy = masterProxy;
        this.masterUUID = masterUUID;
        this.requiredVars = requiredVars;

        if (DEBUG)
            log.debug("orderIndex=" + orderIndex + ", partitionId="
                    + partitionId);

    }

    /**
     * Runs the {@link JoinTask}.
     * 
     * @return <code>null</code>.
     */
    public Void call() throws Exception {

        if (DEBUG)
            log.debug("orderIndex=" + orderIndex + ", partitionId="
                    + partitionId);

        try {

            /*
             * Consume bindingSet chunks from the source JoinTask(s).
             */
            consumeSources();

            /*
             * Flush and close output buffers and wait for all sink
             * JoinTasks to complete.
             */

            // flush the unsync buffers.
//            flushUnsyncBuffers();
            threadLocalBufferFactory.flush();

            // flush the sync buffer and await the sink JoinTasks
            flushAndCloseBuffersAndAwaitSinks();

            if (DEBUG)
                log.debug("JoinTask done: orderIndex=" + orderIndex
                        + ", partitionId=" + partitionId + ", halt=" + halt
                        + "firstCause=" + firstCause.get());
            if (halt)
                throw new RuntimeException(firstCause.get());

            return null;

        } catch (Throwable t) {

            try {
                logCallError(t);
            } catch (Throwable t2) {
                log.error(t2.getLocalizedMessage(), t2);
            }
            
            /*
             * This is used for processing errors and also if this task is
             * interrupted (because a SLICE has been satisfied).
             * 
             * @todo For a SLICE, consider that the query solution buffer
             * proxy could return the #of solutions added so far so that we
             * can halt each join task on the last join dimension in a
             * relatively timely manner producing no more than one chunk too
             * many (actually, it might not be that timely since some index
             * partitions might not produce any solutions; this suggests
             * that the master might need a fatter API than a Future for the
             * JoinTask so that it can directly notify the JoinTasks for the
             * first predicate and they can propagate that notice downstream
             * to their sinks). This will be an issue when fanOut GT ONE.
             */

            halt(t);

            // reset the unsync buffers.
            try {
//                resetUnsyncBuffers();
            	threadLocalBufferFactory.reset();
            } catch (Throwable t2) {
                log.error(t2.getLocalizedMessage(), t2);
            }

            // reset the sync buffer and cancel the sink JoinTasks.
            try {
                cancelSinks();
            } catch (Throwable t2) {
                log.error(t2.getLocalizedMessage(), t2);
            }

            // report join stats _before_ we close our source(s).
            try {
                reportOnce();
            } catch (Throwable t2) {
                log.error(t2.getLocalizedMessage(), t2);
            }

            /*
             * Close source iterators, which will cause any source JoinTasks
             * that are still executing to throw a CancellationException
             * when the Future associated with the source iterator is
             * cancelled.
             */
            try {
                closeSources();
            } catch (Throwable t2) {
                log.error(t2.getLocalizedMessage(), t2);
            }

            throw new RuntimeException(t);

        } finally {

            // report join stats iff they have not already been reported.
            reportOnce();

        }

    }

    /**
     * Method is used to log the primary exception thrown by {@link #call()}.
     * The default implementation does nothing and the exception will be logged
     * by the {@link JoinMasterTask}. However, this method is overridden by
     * {@link DistributedJoinTask} so that the exception can be logged on the
     * host and {@link DataService} where it originates. This appears to be
     * necessary in order to trace back the cause of an exception which can
     * otherwise be obscured (or even lost?) in a deeply nested RMI stack trace.
     * 
     * @param o
     * @param t
     */
    protected void logCallError(Throwable t) {
        
    }

    /**
     * Method reports {@link JoinStats} to the {@link JoinMasterTask}, but
     * only if they have not already been reported. This "report once"
     * constraint is used to make it safe to invoke during error handling
     * before actions which could cause the source {@link JoinTask}s (and
     * hence the {@link JoinMasterTask}) to terminate.
     */
    protected void reportOnce() {

        if (!didReport) {

            didReport = true;

            try {

                // report statistics to the master.
                masterProxy.report(stats);

            } catch (IOException ex) {

                log.warn("Could not report statistics to the master", ex);

            }

        }

    }

    private boolean didReport = false;

//    static private AtomicBoolean firstJoin = new AtomicBoolean(false);
    
    /**
     * Consume {@link IBindingSet} chunks from source(s). The first join
     * dimension always has a single source - the initialBindingSet
     * established by the {@link JoinMasterTask}. Downstream join
     * dimensions read from {@link IAsynchronousIterator}(s) from the
     * upstream join dimension. When the {@link IIndexManager} allows
     * key-range partitions, then the fan-in for the sources may be larger
     * than one as there will be one {@link JoinTask} for each index
     * partition touched by each join dimension.
     * 
     * @throws Exception
     * @throws BufferClosedException
     *             if there is an attempt to output a chunk of
     *             {@link IBindingSet}s or {@link ISolution}s and the
     *             output buffer is an {@link IBlockingBuffer} (true for all
     *             join dimensions exception the lastJoin and also true for
     *             query on the lastJoin) and that {@link IBlockingBuffer}
     *             has been closed.
     */
    protected void consumeSources() throws Exception {

        if (INFO)
            log.info(toString());

        /*
         * The maximum parallelism with which the {@link JoinTask} will
         * consume the source {@link IBindingSet}s.
         * 
         * Note: When ZERO (0), everything will run in the caller's
         * {@link Thread}. When GT ZERO (0), tasks will run on an
         * {@link ExecutorService} with the specified maximum parallelism.
         * 
         * Note: even when maxParallel is zero there will be one thread per
         * join dimension. For many queries that may be just fine.
         * 
         * FIXME parallel execution requires some thread-local unsynchronized
         * buffers -- see my notes elsewhere in this class for what has to be
         * done to support this (actually, it all appears to work just fine).
         */
        final int maxParallel = 0;
//        final int maxParallel = joinNexus.getMaxParallelSubqueries();
//        final int maxParallel = 10;

		/*
		 * Note: There is no reason for parallelism in the first join dimension
		 * as there will be only a single source bindingSet and hence a single
		 * AccessPathTask so the Executor is just overhead.
		 * 
		 * @todo this will not be true when we support binding set joins as the
		 * input could be a stream of binding sets (basically, when the first
		 * join dimension is a subrule, it can have lots of access path tasks).
		 */
        if (orderIndex > 0 && maxParallel > 0) {

//			/*
//			 * Setup parallelism limited executor that will be used to run the
//			 * access path tasks.
//			 */
//        	if(firstJoin.compareAndSet(false/*expect*/,true/*update*/)) {
//				System.err.println("maxParallel=" + maxParallel);
//        	}

			// the sharedService.
			final ExecutorService sharedService = joinNexus.getIndexManager()
					.getExecutorService();

//			final ExecutorService limitedService = Executors
//            .newFixedThreadPool(maxParallel, new DaemonThreadFactory
// (getClass().getName()+".joinService"));

			final Executor limitedService = new LatchedExecutor(sharedService,
					maxParallel);

			/*
			 * consume chunks until done (using caller's thread to consume and
			 * service to run subtasks).
			 */
			new BindingSetConsumerTask(limitedService).call();

			if (halt)
				throw new RuntimeException(firstCause.get());

        } else {

            /*
             * consume chunks until done using the caller's thread and run
             * subtasks in the caller's thread as well.
             */
            new BindingSetConsumerTask(null/* noService */).call();

        }

    }

    /**
     * Close any source {@link IAsynchronousIterator}(s). This method is
     * invoked when a {@link JoinTask} fails.
     */
    abstract void closeSources();

//    /**
//     * Flush the per-{@link Thread} unsynchronized output buffers (they
//     * write onto the thread-safe output buffer).
//     */
//    final protected void flushUnsyncBuffers() throws Exception {
//
//		final int n = threadLocalBufferFactory.flush();

//		close(new Visitor<AbstractUnsynchronizedArrayBuffer<IBindingSet>>() {
//
//					public void meet(
//							final AbstractUnsynchronizedArrayBuffer<IBindingSet> b) {
//
//						// unless halted
//						if (halt)
//							throw new RuntimeException(firstCause.get());
//
//						// #of elements to be flushed.
//						final int size = b.size();
//
//						// flush, returning total #of elements written onto this
//						// buffer.
//						final long counter = b.flush();
//
//						if (DEBUG)
//							log.debug("Flushed buffer: size=" + size
//									+ ", counter=" + counter);
//
//					}
//
//				});
//
//		if (INFO)
//			log.info("Flushed " + n + " unsynchronized buffers");
//
//	}

//    /**
//     * Reset the per-{@link Thread} unsynchronized output buffers (used as
//     * part of error handling for the {@link JoinTask}).
//     */
//    final protected void resetUnsyncBuffers() throws Exception {
//
//		final int n = threadLocalBufferFactory.reset();
//				.close(new Visitor<AbstractUnsynchronizedArrayBuffer<IBindingSet>>() {
//
//					@Override
//					public void meet(
//							final AbstractUnsynchronizedArrayBuffer<IBindingSet> b)
//							throws Exception {
//
//						// #of elements in the buffer before reset().
//						final int size = b.size();
//
//						// flush the buffer.
//						b.reset();
//
//						if (DEBUG)
//							log.debug("Reset buffer: size=" + size);
//					}
//				});
//
//		if (INFO)
//			log.info("Reset " + n + " unsynchronized buffers");
//
//	}

	/**
	 * Flush and close all output buffers and await sink {@link JoinTask}(s).
	 * <p>
	 * Note: You MUST close the {@link BlockingBuffer} from which each sink
	 * reads <em>before</em> invoking this method in order for those sinks to
	 * terminate. Otherwise the source {@link IAsynchronousIterator}(s) on which
	 * the sink is reading will remain open and the sink will never decide that
	 * it has exhausted its source(s).
	 * 
	 * @throws InterruptedException
	 * @throws ExecutionException
	 */
    abstract protected void flushAndCloseBuffersAndAwaitSinks()
            throws InterruptedException, ExecutionException;

    /**
     * Cancel sink {@link JoinTask}(s).
     */
    abstract protected void cancelSinks();

    /**
     * Return a chunk of {@link IBindingSet}s from the
     * {@link IAsynchronousIterator}s. The 1st join dimension is always fed
     * by the {@link JoinMasterTask}. The nth+1 join dimension is always
     * fed by the nth {@link JoinTask}(s).
     * 
     * @return The next available chunk of {@link IBindingSet}s -or-
     *         <code>null</code> IFF all known source(s) are exhausted.
     */
    abstract protected IBindingSet[] nextChunk() throws InterruptedException;

    /**
     * Class consumes chunks from the source(s) until canceled,
     * interrupted, or all source(s) are exhausted. For each
     * {@link IBindingSet} in each chunk, an {@link AccessPathTask} is
     * created which will consume that {@link IBindingSet}. The
     * {@link AccessPathTask} for a given source chunk are sorted based on
     * their <code>fromKey</code> so as to order the execution of those
     * tasks in a manner that will maximize the efficiency of index reads.
     * The ordered {@link AccessPathTask}s are then submitted to the
     * caller's {@link Executor}.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
     *         Thompson</a>
     * @version $Id$
     */
    protected class BindingSetConsumerTask implements Callable<Void> {

        private final Executor executor;

        /**
         * 
         * @param executor
         *            The service that will execute the generated
         *            {@link AccessPathTask}s -or- <code>null</code> IFF
         *            you want the {@link AccessPathTask}s to be executed
         *            in the caller's thread.
         */
        public BindingSetConsumerTask(final Executor executor) {

            this.executor = executor;

        }

        /**
         * Read chunks from one or more sources until canceled,
         * interrupted, or all sources are exhausted and submits
         * {@link AccessPathTask}s to the caller's {@link ExecutorService}
         * -or- executes those tasks in the caller's thread if no
         * {@link ExecutorService} was provided to the ctor.
         * <p>
         * Note: When running with an {@link ExecutorService}, the caller
         * is responsible for waiting on that {@link ExecutorService} until
         * the {@link AccessPathTask}s to complete and must verify all
         * tasks completed successfully.
         * 
         * @return <code>null</code>
         * 
         * @throws BufferClosedException
         *             if there is an attempt to output a chunk of
         *             {@link IBindingSet}s or {@link ISolution}s and the
         *             output buffer is an {@link IBlockingBuffer} (true for
         *             all join dimensions exception the lastJoin and also
         *             true for query on the lastJoin) and that
         *             {@link IBlockingBuffer} has been closed.
         */
        public Void call() throws Exception {

            try {

                if (DEBUG)
                    log.debug("begin: orderIndex=" + orderIndex
                            + ", partitionId=" + partitionId);

                IBindingSet[] chunk;

                while (!halt && (chunk = nextChunk()) != null) {
                    
                    /*
                     * @todo ChunkTrace for bindingSet chunks in as well as
                     * access path chunks consumed.
                     */
                    
                    if (DEBUG)
                        log.debug("Read chunk of bindings: chunkSize="
                                + chunk.length + ", orderIndex=" + orderIndex
                                + ", partitionId=" + partitionId);

                    /*
                     * Aggregate the source bindingSets that license the
                     * same asBound predicate.
                     */
                    final Map<IPredicate<?>, Collection<IBindingSet>> map = combineBindingSets(chunk);

                    /*
                     * Generate an AccessPathTask from each distinct
                     * asBound predicate that will consume all of the source
                     * bindingSets in the chunk which resulted in the same
                     * asBound predicate.
                     */
                    final AccessPathTask[] tasks = getAccessPathTasks(map);

                    /*
                     * Reorder those tasks for better index read
                     * performance.
                     */
                    reorderTasks(tasks);

                    /*
                     * Execute the tasks (either in the caller's thread or
                     * on the supplied service).
                     */
                    executeTasks(tasks);

                }

                if (halt)
                    throw new RuntimeException(firstCause.get());

                if (DEBUG)
                    log.debug("done: orderIndex=" + orderIndex
                            + ", partitionId=" + partitionId);

                return null;

            } catch (Throwable t) {

                halt(t);

                throw new RuntimeException(t);

            }

        }

        /**
         * Populates a map of asBound predicates paired to a set of
         * bindingSets.
         * <p>
         * Note: The {@link AccessPathTask} will apply each bindingSet to
         * each element visited by the {@link IAccessPath} obtained for the
         * asBound {@link IPredicate}. This has the natural consequence of
         * eliminating subqueries within the chunk.
         * 
         * @param chunk
         *            A chunk of bindingSets from the source join dimension.
         * 
         * @return A map which pairs the distinct asBound predicates to the
         *         bindingSets in the chunk from which the predicate was
         *         generated.
         */
        protected Map<IPredicate<?>, Collection<IBindingSet>> combineBindingSets(
                final IBindingSet[] chunk) {

            if (DEBUG)
                log.debug("chunkSize=" + chunk.length);

            final int tailIndex = getTailIndex(orderIndex);

            final Map<IPredicate<?>, Collection<IBindingSet>> map = new LinkedHashMap<IPredicate<?>, Collection<IBindingSet>>(
                    chunk.length);

            for (IBindingSet bindingSet : chunk) {

                if (halt)
                    throw new RuntimeException(firstCause.get());

                // constrain the predicate to the given bindings.
                IPredicate<?> predicate = rule.getTail(tailIndex).asBound(
                        bindingSet);

                if (partitionId != -1) {

                    /*
                     * Constrain the predicate to the desired index partition.
                     * 
                     * Note: we do this for scale-out joins since the access
                     * path will be evaluated by a JoinTask dedicated to this
                     * index partition, which is part of how we give the
                     * JoinTask to gain access to the local index object for an
                     * index partition.
                     */

                    predicate = predicate.setPartitionId(partitionId);

                }

                // lookup the asBound predicate in the map.
                Collection<IBindingSet> values = map.get(predicate);

                if (values == null) {

                    /*
                     * This is the first bindingSet for this asBound
                     * predicate. We create a collection of bindingSets to
                     * be paired with that predicate and put the collection
                     * into the map using that predicate as the key.
                     */

                    values = new LinkedList<IBindingSet>();

                    map.put(predicate, values);

                } else {

                    // more than one bindingSet will use the same access path.
                    stats.accessPathDups++;

                }

                /*
                 * Add the bindingSet to the collection of bindingSets
                 * paired with the asBound predicate.
                 */

                values.add(bindingSet);

            }

            if (DEBUG)
                log.debug("chunkSize=" + chunk.length
                        + ", #distinct predicates=" + map.size());

            return map;

        }

        /**
         * Creates an {@link AccessPathTask} for each {@link IBindingSet} in
         * the given chunk.
         * 
         * @param chunk
         *            A chunk of {@link IBindingSet}s from one or more
         *            source {@link JoinTask}s.
         * 
         * @return A chunk of {@link AccessPathTask} in a desirable
         *         execution order.
         * 
         * @throws Exception
         */
        protected AccessPathTask[] getAccessPathTasks(
                final Map<IPredicate<?>, Collection<IBindingSet>> map) {

            final int n = map.size();

            if (DEBUG)
                log.debug("#distinct predicates=" + n);

            final AccessPathTask[] tasks = new AccessPathTask[n];

            final Iterator<Map.Entry<IPredicate<?>, Collection<IBindingSet>>> itr = map
                    .entrySet().iterator();

            int i = 0;

            while (itr.hasNext()) {

                if (halt)
                    throw new RuntimeException(firstCause.get());

                final Map.Entry<IPredicate<?>, Collection<IBindingSet>> entry = itr
                        .next();

                tasks[i++] = new AccessPathTask(entry.getKey(), entry.getValue());

            }

            return tasks;

        }

        /**
         * The tasks are ordered based on the <i>fromKey</i> for the
         * associated {@link IAccessPath} as licensed by each
         * {@link IBindingSet}. This order tends to focus the reads on the
         * same parts of the index partitions with a steady progression in
         * the <i>fromKey</i> as we process a chunk of {@link IBindingSet}s.
         * 
         * @param tasks
         *            The tasks.
         */
        protected void reorderTasks(final AccessPathTask[] tasks) {

            // @todo layered access paths do not expose a fromKey.
            if (tasks[0].accessPath instanceof AccessPath<?>) {

                // reorder the tasks.
                Arrays.sort(tasks);

            }

        }

        /**
         * Either execute the tasks in the caller's thread or schedule them
         * for execution on the supplied service.
         * 
         * @param tasks
         *            The tasks.
         * 
         * @throws Exception
         */
        protected void executeTasks(final AccessPathTask[] tasks)
                throws Exception {

			if (executor == null) {

				/*
				 * No Executor, so run each task in the caller's thread.
				 */
				
				for (AccessPathTask task : tasks) {

					task.call();

				}

				return;

			}

			/*
			 * Build list of FutureTasks. This list is used to check all tasks
			 * for errors and ensure that any running tasks are cancelled.
			 */
			
        	final List<FutureTask<Void>> futureTasks = new LinkedList<FutureTask<Void>>();
			
        	for (AccessPathTask task : tasks) {
			
        		final FutureTask<Void> ft = new FutureTask<Void>(task);
        		
				futureTasks.add(ft);
				
			}

			try {

	        	/*
	        	 * Execute all tasks.
	        	 */
				for (FutureTask<Void> ft : futureTasks) {

					if (halt)
						throw new RuntimeException(firstCause.get());

					// Queue for execution.
					executor.execute(ft);

				} // next task.

				/*
				 * Wait for each task. If any task throws an exception, then
				 * [halt] will become true and any running tasks will error out
				 * quickly. Once [halt := true], we do not wait for any more
				 * tasks, but proceed to cancel all tasks in the finally {}
				 * clause below.
				 */
				for (FutureTask<Void> ft : futureTasks) {

					// Wait for a task.
					if (!halt)
						ft.get();

				}
				
			} finally {

				/*
				 * Ensure that all tasks are cancelled, regardless of whether
				 * they were started or have already finished.
				 */
				for (FutureTask<Void> ft : futureTasks) {

					ft.cancel(true/* mayInterruptIfRunning */);
					
				}
				
			}

        }

    }

    /**
     * Accepts an asBound {@link IPredicate} and a (non-empty) collection of
     * {@link IBindingSet}s each of which licenses the same asBound
     * predicate for the current join dimension. The task obtains the
     * corresponding {@link IAccessPath} and delegates each chunk visited on
     * that {@link IAccessPath} to a {@link ChunkTask}. Note that optionals
     * are also handled by this task.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
     *         Thompson</a>
     * @version $Id$
     */
	protected class AccessPathTask implements Callable<Void>,
			Comparable<AccessPathTask> {

        /**
         * The {@link IBindingSet}s from the source join dimension to be
         * combined with each element visited on the {@link #accessPath}.
         * If there is only a single source {@link IBindingSet} in a given
         * chunk of source {@link IBindingSet}s that results in the same
         * asBound {@link IPredicate} then this will be a collection with a
         * single member. However, if multiple source {@link IBindingSet}s
         * result in the same asBound {@link IPredicate} within the same
         * chunk then those are aggregated and appear together in this
         * collection.
         * <p>
         * Note: An array is used for thread-safe traversal.
         */
        final private IBindingSet[] bindingSets;

        /**
         * The {@link IAccessPath} corresponding to the asBound
         * {@link IPredicate} for this join dimension. The asBound
         * {@link IPredicate} is {@link IAccessPath#getPredicate()}.
         */
        final private IAccessPath<?> accessPath;

        /**
         * Return the <em>fromKey</em> for the {@link IAccessPath} generated
         * from the {@link IBindingSet} for this task.
         * 
         * @todo layered access paths do not expose a fromKey. This information
         *       is always available from the {@link SPOKeyOrder} and that
         *       method will be raised into the {@link IKeyOrder}.
         *       Unfortunately, for RDF we also need to know if triples or quads
         *       are being used, which is a property on the container or the
         *       relation.
         */
        protected byte[] getFromKey() {

            return ((AccessPath<?>) accessPath).getFromKey();

        }

        /**
         * Return <code>true</code> iff the tasks are equivalent (same as
         * bound predicate). This test may be used to eliminate duplicates
         * that arise when different source {@link JoinTask}s generate the
         * same {@link IBindingSet}.
         * 
         * @param o
         *            Another task.
         * 
         * @return if the as bound predicate is equals().
         */
        public boolean equals(final AccessPathTask o) {

            return accessPath.getPredicate()
                    .equals(o.accessPath.getPredicate());

        }

        /**
         * Evaluate an {@link IBindingSet} for the join dimension. When the
         * task runs, it will pair each element visited on the
         * {@link IAccessPath} with the asBound {@link IPredicate}. For
         * each element visited, if the binding is acceptable for the
         * constraints on the asBound {@link IPredicate}, then the task
         * will emit one {@link IBindingSet} for each source
         * {@link IBindingSet}.
         * 
         * @param predicate
         *            The asBound {@link IPredicate}.
         * @param bindingSets
         *            A collection of {@link IBindingSet}s from the source
         *            join dimension that all result in the same asBound
         *            {@link IPredicate}.
         */
        public AccessPathTask(final IPredicate<?> predicate,
                final Collection<IBindingSet> bindingSets) {

            if (predicate == null)
                throw new IllegalArgumentException();

            if (bindingSets == null)
                throw new IllegalArgumentException();

            /*
             * Note: this needs to be the access path for the local index
             * partition. We handle this by (a) constraining the predicate
             * to the desired index partition; (b) using an IJoinNexus that
             * is initialized once the JoinTask starts to execute inside of
             * the ConcurrencyManager; (c) declaring; and (d) using the
             * index partition name NOT the scale-out index name.
             */

            final int n = bindingSets.size();

            if (n == 0)
                throw new IllegalArgumentException();

            this.accessPath = joinNexus.getTailAccessPath(relation, predicate);

            if (DEBUG)
                log.debug("orderIndex=" + orderIndex + ", tailIndex="
                        + tailIndex + ", tail=" + rule.getTail(tailIndex)
                        + ", #bindingSets=" + n + ", accessPath=" + accessPath);

            // convert to array for thread-safe traversal.
            this.bindingSets = bindingSets.toArray(new IBindingSet[n]);

        }

        public String toString() {

            return JoinTask.this.getClass().getSimpleName() + "{ orderIndex="
                    + orderIndex + ", partitionId=" + partitionId
                    + ", #bindingSets=" + bindingSets.length + "}";

        }

        /**
         * Evaluate the {@link #accessPath} against the {@link #bindingSets}.
         * If nothing is accepted and {@link IPredicate#isOptional()} then
         * the {@link #bindingSets} is output anyway (this implements the
         * semantics of OPTIONAL).
         * 
         * @return <code>null</code>.
         * 
         * @throws BufferClosedException
         *             if there is an attempt to output a chunk of
         *             {@link IBindingSet}s or {@link ISolution}s and the
         *             output buffer is an {@link IBlockingBuffer} (true for
         *             all join dimensions exception the lastJoin and also
         *             true for query on the lastJoin) and that
         *             {@link IBlockingBuffer} has been closed.
         */
        public Void call() throws Exception {

            if (halt)
                throw new RuntimeException(firstCause.get());

            stats.accessPathCount++;

            if (accessPath.getPredicate() instanceof IStarJoin<?>) {
                
                handleStarJoin();
                
            } else {
            	
            	handleJoin();
            	
            }

            return null;
            
        }
        
		/**
		 * A vectored pipeline join (chunk at a time processing).
		 */
        protected void handleJoin() {
            
            boolean nothingAccepted = true;

            // Obtain the iterator for the current join dimension.
            final IChunkedOrderedIterator<?> itr = accessPath.iterator();

            try {

				/*
				 * @todo In order to run the chunks on a thread pool, pass in
				 * [null] for the unsyncBuffer and each chunk will get its own
				 * buffer.
				 */
				final AbstractUnsynchronizedArrayBuffer<IBindingSet> unsyncBuffer = threadLocalBufferFactory
						.get();

                while (itr.hasNext()) {

                    final Object[] chunk = itr.nextChunk();

                    stats.chunkCount++;

                    // process the chunk in the caller's thread.
					final boolean somethingAccepted = new ChunkTask(
							bindingSets, unsyncBuffer, chunk).call();

                    if (somethingAccepted) {

                        // something in the chunk was accepted.
                        nothingAccepted = false;

                    }

                } // next chunk.

                if (nothingAccepted && predicate.isOptional()) {

                    /*
                     * Note: when NO binding sets were accepted AND the
                     * predicate is OPTIONAL then we output the _original_
                     * binding set(s) to the sink join task(s).
                     */

                    for (IBindingSet bs : this.bindingSets) {

                        unsyncBuffer.add(bs);

                    }

                }

                return;

            } catch (Throwable t) {

                halt(t);

                throw new RuntimeException(t);

            } finally {

                itr.close();

            }

        }

		protected void handleStarJoin() {

			IBindingSet[] solutions = this.bindingSets;

			final IStarJoin starJoin = (IStarJoin) accessPath.getPredicate();

			final AbstractUnsynchronizedArrayBuffer<IBindingSet> unsyncBuffer = threadLocalBufferFactory
					.get();

			// Obtain the iterator for the current join dimension.
			final IChunkedOrderedIterator<?> itr = accessPath.iterator();

			// The actual #of elements scanned.
            int numElements = 0;

			try {

				/*
				 * Note: The fast range count would give us an upper bound,
				 * unless expanders are used, in which case there can be more
				 * elements visited.
				 */
				final Object[] elements;
				{

					/*
					 * First, gather all chunks.
					 */
					int nchunks = 0;
					final List<Object[]> chunks = new LinkedList<Object[]>();
					while (itr.hasNext()) {

						final Object[] chunk = (Object[]) itr.nextChunk();

						// add to list of chunks.
						chunks.add(chunk);
						
						numElements += chunk.length;

	                    stats.chunkCount++;
	                    
	                    nchunks++;

					} // next chunk.

					/*
					 * Now flatten the chunks into a simple array.
					 */
					if (nchunks == 0) {
						// No match.
						return;
					}
					if (nchunks == 1) {
						// A single chunk.
						elements = chunks.get(0);
					} else {
						// Flatten the chunks.
						elements = new Object[numElements];
						{
							int n = 0;
							for (Object[] chunk : chunks) {

								System.arraycopy(chunk/* src */, 0/* srcPos */,
										elements/* dst */, n/* dstPos */,
										chunk.length/* len */);

								n += chunk.length;
							}
						}
					} 
					stats.elementCount += numElements;

				}

				if (numElements > 0) {
                    
                    final Iterator<IStarConstraint<?>> it = 
                        starJoin.getStarConstraints();
                    
                    boolean constraintFailed = false;
                    
                    while (it.hasNext()) {
                        
                        final IStarConstraint constraint = it.next();
                        
                        Collection<IBindingSet> constraintSolutions = null;
                        
                        int numVars = constraint.getNumVars();
                        
                        for (int i = 0; i < numElements; i++) {

                            Object e = elements[i];
                            
                            if (constraint.isMatch(e)) {

								/*
								 * For each match for the constraint, we clone
								 * the old solutions and create a new solutions
								 * that appends the variable bindings from this
								 * match.
								 * 
								 * At the end, we set the old solutions
								 * collection to the new solutions collection.
								 */ 
                                
                                if (constraintSolutions == null) {
                                    
                                    constraintSolutions = 
                                        new LinkedList<IBindingSet>();
                                    
                                }
                                
                                for (IBindingSet bs : solutions) {
                                
                                    if (numVars > 0) {
                                        
                                        bs = bs.clone();
                                        
                                        constraint.bind(bs, e);

                                    }
                                    
                                    constraintSolutions.add(bs);

                                }
                                
                                // no reason to keep testing SPOs, there can
                                // be only one
                                if (numVars == 0) {
                                    
                                    break;
                                    
                                }
                                
                            }
                            
                        }
                        
                        if (constraintSolutions == null) {
                            
                            // we did not find any matches to this constraint
                            // that is ok, as long it's optional
                            if (constraint.isOptional() == false) {
                                
                                constraintFailed = true;
                                
                                break;
                                
                            }
                            
                        } else {
                            
                            // set the old solutions to the new solutions, and
                            // move on to the next constraint
                            solutions = constraintSolutions.toArray(
                                    new IBindingSet[constraintSolutions.size()]);
                            
                        }
                        
                    }
                    
                    if (!constraintFailed) {
                        
                        for (IBindingSet bs : solutions) {
                            
                            unsyncBuffer.add(bs);
                            
                        }
                        
                    }
                    
                }
                
                return;

            } catch (Throwable t) {

                halt(t);

                throw new RuntimeException(t);

            } finally {

                itr.close();

            }
            
        }

        /**
         * Imposes an order based on the <em>fromKey</em> for the
         * {@link IAccessPath} associated with the task.
         * 
         * @param o
         * 
         * @return
         */
        public int compareTo(final AccessPathTask o) {

            return BytesUtil.compareBytes(getFromKey(), o.getFromKey());

        }

    }

    /**
     * Task processes a chunk of elements read from the {@link IAccessPath}
     * for a join dimension. Each element in the chunk in paired with a copy
     * of the given bindings. If that {@link IBindingSet} is accepted by the
     * {@link IRule}, then the {@link IBindingSet} will be output. The
     * {@link IBindingSet}s to be output are buffered into chunks and the
     * chunks added to the {@link JoinPipelineTask#bindingSetBuffers} for
     * the corresponding predicate.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
     *         Thompson</a>
     * @version $Id$
     */
    protected class ChunkTask implements Callable<Boolean> {

        /**
         * The index of the predicate for the access path that is being
         * consumed.
         */
        private final int tailIndex;

        /**
         * The {@link IBindingSet}s which the each element in the chunk
         * will be paired to create {@link IBindingSet}s for the downstream
         * join dimension.
         */
        private final IBindingSet[] bindingSets;

        /**
         * A per-{@link Thread} buffer that is used to collect
         * {@link IBindingSet}s into chunks before handing them off to the
         * next join dimension. The hand-off occurs no later than when the
         * current join dimension finishes consuming its source(s).
         */
        private final AbstractUnsynchronizedArrayBuffer<IBindingSet> unsyncBuffer;

        /**
         * A chunk of elements read from the {@link IAccessPath} for the
         * current join dimension.
         */
        private final Object[] chunk;

		/**
		 * 
		 * @param bindingSet
		 *            The bindings with which the each element in the chunk will
		 *            be paired to create the bindings for the downstream join
		 *            dimension.
		 * @param unsyncBuffer
		 *            A per-{@link Thread} buffer used to accumulate chunks of
		 *            generated {@link IBindingSet}s (optional). When the
		 *            {@link ChunkTask} will be run in its own thread, pass
		 *            <code>null</code> and the buffer will be obtained in
		 *            {@link #call()}.
		 * @param chunk
		 *            A chunk of elements read from the {@link IAccessPath} for
		 *            the current join dimension.
		 */
        public ChunkTask(
                final IBindingSet[] bindingSet,
                final AbstractUnsynchronizedArrayBuffer<IBindingSet> unsyncBuffer,
                final Object[] chunk) {

            if (bindingSet == null)
                throw new IllegalArgumentException();

            // Allow null!
//            if (unsyncBuffer == null)
//                throw new IllegalArgumentException();

            if (chunk == null)
                throw new IllegalArgumentException();

            this.tailIndex = getTailIndex(orderIndex);

            this.bindingSets = bindingSet;

            this.chunk = chunk;

            this.unsyncBuffer = unsyncBuffer;

        }

        /**
         * @return <code>true</code> iff NO elements in the chunk (as read
         *         from the access path by the caller) were accepted when
         *         combined with the {@link #bindingSets} from the source
         *         {@link JoinTask}.
         * 
         * @throws BufferClosedException
         *             if there is an attempt to output a chunk of
         *             {@link IBindingSet}s or {@link ISolution}s and the
         *             output buffer is an {@link IBlockingBuffer} (true for
         *             all join dimensions exception the lastJoin and also
         *             true for query on the lastJoin) and that
         *             {@link IBlockingBuffer} has been closed.
         */
        public Boolean call() throws Exception {

            try {

                ChunkTrace.chunk(orderIndex, chunk);

                boolean nothingAccepted = true;

				// Use caller's or obtain our own as necessary.
				final AbstractUnsynchronizedArrayBuffer<IBindingSet> unsyncBuffer = (this.unsyncBuffer == null) ? threadLocalBufferFactory
						.get()
						: this.unsyncBuffer;

        		for (Object e : chunk) {

                    if (halt)
                        return nothingAccepted;

                    // naccepted for the current element (trace only).
                    int naccepted = 0;

                    stats.elementCount++;

                    for (IBindingSet bset : bindingSets) {

                        final IVariable<?>[] variablesToKeep = requiredVars[tailIndex];
                        
                        if (INFO) {
                            log.info("tailIndex: " + tailIndex);
                            log.info("bset before: " + bset);
                        }
                        
                        /*
                         * Clone the binding set since it is tested for each
                         * element visited.
                         */
                        bset = bset.clone();

                        if (INFO) {
                            log.info("tailIndex: " + tailIndex);
                            log.info("bset after: " + bset);
                            log.info("element: " + e);
                        }
                        
                        // propagate bindings from the visited element.
                        if (joinNexus.bind(rule, tailIndex, e, bset)) {

                            bset = bset.copy(variablesToKeep);

                            /*
                             * Accept this binding set.
                             * 
                             * @todo This is the place to intervene for
                             * scale-out default graph queries. Instead of
                             * directly accepting the bset, place the (bset,e)
                             * pair on a queue which targets a distributed hash
                             * map imposing distinct on [e] and only insert into
                             * the unsyncBuffer those [bset]s which pass the
                             * filter.
                             * 
                             * The life cycle of that filter needs to be
                             * protected with a latch or zlock. Each JoinTask
                             * must wait until the filter has answered each of
                             * its queued (bset,e) pairs, which could be done
                             * using a latch.
                             */
                            unsyncBuffer.add(bset);

                            naccepted++;

                            nothingAccepted = false;

                        }

                    }

                    if (DEBUG)
                        log.debug("Accepted element for " + naccepted + " of "
                                + bindingSets.length
                                + " possible bindingSet combinations: "
                                + e.toString() + ", orderIndex=" + orderIndex
                                + ", lastJoin=" + lastJoin + ", rule="
                                + rule.getName());
                }

                // if something is accepted in the chunk return true.
                return nothingAccepted ? Boolean.FALSE: Boolean.TRUE;

            } catch (Throwable t) {

                halt(t);

                throw new RuntimeException(t);

            }

        }

    }

}