AST2BOpRTO.java example

Explorer
blazegraph-master
- database-master
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     licenses@blazegraph.com

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Dec 23, 2013
 */
package com.bigdata.rdf.sparql.ast.eval;

import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.log4j.Logger;

import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpEvaluationContext;
import com.bigdata.bop.BOpIdFactory;
import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.BadBOpIdTypeException;
import com.bigdata.bop.Constant;
import com.bigdata.bop.DuplicateBOpIdException;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstraint;
import com.bigdata.bop.IPredicate;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.NV;
import com.bigdata.bop.NoBOpIdException;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.Var;
import com.bigdata.bop.ap.Predicate;
import com.bigdata.bop.ap.SampleIndex.SampleType;
import com.bigdata.bop.engine.IRunningQuery;
import com.bigdata.bop.engine.QueryEngine;
import com.bigdata.bop.join.JoinAnnotations;
import com.bigdata.bop.join.PipelineJoin;
import com.bigdata.bop.join.PipelineJoinStats;
import com.bigdata.bop.joinGraph.PartitionedJoinGroup;
import com.bigdata.bop.joinGraph.rto.EdgeSample;
import com.bigdata.bop.joinGraph.rto.EstimateEnum;
import com.bigdata.bop.joinGraph.rto.JGraph;
import com.bigdata.bop.joinGraph.rto.JoinGraph;
import com.bigdata.bop.joinGraph.rto.Path;
import com.bigdata.bop.joinGraph.rto.SampleBase;
import com.bigdata.bop.joinGraph.rto.VertexSample;
import com.bigdata.bop.rdf.join.ChunkedMaterializationOp;
import com.bigdata.bop.rdf.join.DataSetJoin;
import com.bigdata.bop.solutions.SliceOp;
import com.bigdata.journal.IIndexManager;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.impl.literal.XSDNumericIV;
import com.bigdata.rdf.sparql.ast.ASTContainer;
import com.bigdata.rdf.sparql.ast.IGroupMemberNode;
import com.bigdata.rdf.sparql.ast.JoinGroupNode;
import com.bigdata.rdf.sparql.ast.QueryHints;
import com.bigdata.rdf.sparql.ast.StatementPatternNode;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.striterator.Dechunkerator;
import com.bigdata.util.NT;

/**
 * Integration with the Runtime Optimizer (RTO).
 * <p>
 * Note: The RTO currently uses bottom-up evaluation to solve the join graph and
 * generate a sub-query plan with an optimized join ordering. It uses
 * left-to-right evaluation to pass pipeline solutions through the optimized
 * subquery.
 * 
 * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/64">Runtime
 *      Query Optimization</a>
 * 
 * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/258">Integrate
 *      RTO into SAIL</a>
 * @see <a
 *      href="http://www-db.informatik.uni-tuebingen.de/files/research/pathfinder/publications/rox-demo.pdf">
 *      ROX </a>
 * @see JoinGraph
 * @see JGraph
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 */
public class AST2BOpRTO extends AST2BOpJoins {

    public static final transient Logger log = Logger
            .getLogger(AST2BOpRTO.class);

    public interface Annotations extends AST2BOpJoins.Annotations {
        
        /**
         * Annotation is used to tag the {@link StatementPatternNode}s in a
         * {@link JoinGroupNode} with the identified assigned to the
         * corresponding {@link IPredicate}. This makes it possible to lookup
         * the SPO from the {@link IPredicate} when the RTO hands us back an
         * ordered join path.
         */
        String PREDICATE_ID = "PredicateId";
        
    }
    
    /**
     * The RTO is most useful for queries with high latency and a large number
     * of joins. You do not need to apply the RTO if there is only a single
     * triple pattern since there are no joins. You do not need to apply the RTO
     * if there are two triple patterns since we will run the most selective one
     * first. The RTO only starts to be useful when there are at least three
     * triple patterns - that gives you three vertices and two joins.
     * 
     * TODO For incremental query construction UIs, it would be useful to run
     * just the RTO and to run it with even a single join. This will give us
     * sample values as well as estimates cardinalities. If the UI has triple
     * patterns that do not join (yet), then those should be grouped. This could
     * be done if we provided a simple SPARQL query with a query hint to run the
     * RTO, trapped the {@link IRunningQuery} to get the detailed RTO results,
     * and specified the minimum threashold for running the RTO for that query
     * as TWO joins, basically just overriding this field.
     */
    static private final int RTO_MIN_JOINS = 3;
    
    /**
     * When <code>true</code>, the RTO will only accept simple joins into the
     * join graph. Simple joins includes triples-mode joins and filters that do
     * not have materialization requirements. Non-simple joins includes
     * quads-mode joins and filters that have materialization contexts.
     */
    static private final boolean onlySimpleJoins = false;
    
    /**
     * When <code>true</code>, the RTO will not accept OPTIONAL joins into the
     * join graph. Optional joins can not decrease the intermediate cardinality
     * since they can not eliminate solutions, but they can increase the
     * intermediate cardinality by finding multiple bindings in the OPTIONAL
     * join. Therefore, it makes sense to execute optional in an order that
     * defers as long as possible any increase in the intermediate cardinality.
     * This is very much the same as ordering required joins, but the OPTIONAL
     * joins need to be handled AFTER the required joins.
     * 
     * FIXME RTO: OPTIONALS: Handle optional SPs in joinGraph by ordering them
     * in the tail so as to minimize the cost function. Once implemented, we can
     * drop this field.
     */
    static private final boolean onlyRequiredJoins = true;
    
    /**
     * When <code>true</code>, the RTO will only accept statement patterns into
     * the join graph. When <code>false</code>, it will attempt to handle non-SP
     * joins, e.g., sub-query, exists, property paths, etc.
     */
    static private final boolean onlySPs = true;
    
    /**
     * When <code>true</code>, even simple JOINs will run through the code path
     * for the evaluation of complex joins.
     * <p>
     * Note: The complex join evaluation code is written to also allow
     * evaluation of a simple JOIN. This makes it easier to test this code path
     * by testing it against a single {@link PipelineJoin} and comparing with
     * the historical behavior for cutoff evaluation of the {@link PipelineJoin}
     * . However, that only works if you also disable the reordering of access
     * paths in the JOIN.
     * <p>
     * Note: access path reordering is allowed for simple JOINS when we use
     * {@link #runSimpleJoin(QueryEngine, SampleBase, int, PipelineJoin)}.
     * 
     * @see PipelineJoin.Annotations#REORDER_ACCESS_PATHS
     * @see AST2BOpJoins
     * @see #runSimpleJoin(QueryEngine, SampleBase, int, PipelineJoin)
     * @see #runComplexJoin(QueryEngine, SampleBase, int, PipelineOp)
     * 
     *      TODO RTO: Measure performance when using the complex join code path
     *      as opposed to the simple join code path. If there is a performance
     *      win for the simple join code path, then set this to false in
     *      committed code. If not, then we might as well run everything in the
     *      same fashion.
     *      <p>
     *      Note: The simple and complex cutoff join code do currently differ
     *      somewhat in the end result that they produce. This can be observed
     *      in BSBM Q1 on the pc100 data set and the BAR query.
     */
    static final boolean runAllJoinsAsComplexJoins = false;

    /**
     * When <code>true</code>, out of order evaluation will cause the RTO to
     * fail. When <code>false</code>, out of order evaluation is silently
     * ignored.
     * <p>
     * Out of order evaluation makes it impossible to accurately determine the
     * estimated cardinality of the join since we can not compute the join hit
     * ratio without knowing the #of solutions in required to produce a given
     * #of solutions out.
     * <p>
     * There are several possible root causes for out of order evaluation. One
     * is reordering of access paths in the pipeline join. Another is reordering
     * of solutions when they are output from an operator. Another is an
     * operator which uses the altSink and thus does not always route solutions
     * along a single path.
     * 
     * @see #checkQueryPlans
     */
    static final private boolean failOutOfOrderEvaluation = true;
    
    /**
     * When <code>true</code>, the generated query plans for cutoff evaluation
     * will be checked to verify that the query plans do not permit reordering
     * of solutions.
     * 
     * @see #failOutOfOrderEvaluation
     */
    static final private boolean checkQueryPlans = false; // Note: Make [false] in committed code!
    
    /**
     * Inspect the remainder of the join group. If we can isolate a join graph
     * and filters, then we will push them down into an RTO JoinGroup. Since the
     * joins have already been ordered by the static optimizer, we can accept
     * them in sequence along with any attachable filters and (if we get at
     * least 3 predicates) push them down into an RTO join group.
     * <p>
     * Note: Two predicates in a join group is not enough for the RTO to provide
     * a different join ordering. Both the static optimizer and the RTO will
     * always choose the AP with the smaller cardinality to run first. If there
     * are only 2 predicates, then the other predicate will run second. You need
     * at least three predicates before the RTO could provide a different
     * answer.
     */
    static protected PipelineOp convertRTOJoinGraph(PipelineOp left,
            final JoinGroupNode joinGroup, final Set<IVariable<?>> doneSet,
            final AST2BOpContext ctx, final AtomicInteger start) {

        /*
         * Snapshot of the doneSet on entry. This gets passed into the RTO.
         */
        final Set<IVariable<?>> doneSetIn = Collections
                .unmodifiableSet(doneSet);

        if (onlySimpleJoins && ctx.isQuads()) {

            return left;

        }

        /*
         * Consider the join group. See if it is complex enough to warrant
         * running the RTO.
         * 
         * TODO Can we also make a decision based on whether there is uncertain,
         * e.g., a swing in stakes, about the cardinality estimates for the
         * predicates in the join graph, etc.? This could give us a means to
         * avoid using the RTO if the join graph is known to run quickly or the
         * ordering of the joins generated by the static query optimizer is
         * known to be good.
         * 
         * TODO The static optimizer could simply annotate join groups for which
         * it recognizes that it could have a bad join plan.
         */
        final int arity = joinGroup.arity();

        /*
         * Create a JoinGroup just for the pieces that the RTO will handle.
         * 
         * TODO These joins are CLONED right now since they also exist in the
         * caller's joinGroup. Eventually, this code should be moved into an AST
         * rewrite and we can then destructively move the children from the
         * original JoinGroupNode into a JoinGraphNode that will be the AST side
         * of the RTO.
         */
        final JoinGroupNode rtoJoinGroup = new JoinGroupNode();
        rtoJoinGroup.setQueryHints(joinGroup.getQueryHints());
        
//        final Set<StatementPatternNode> sps = new LinkedHashSet<StatementPatternNode>();
        // The predicates for the join graph.
        @SuppressWarnings("rawtypes")
        final LinkedList<Predicate> preds = new LinkedList<Predicate>();
        // The constraints for the join graph.
        final List<IConstraint> constraints = new LinkedList<IConstraint>();
        // The #of JOINs accepted into the RTO's join group.
        int naccepted = 0;
        {
            /*
             * The [doneSet] will be modified iff we actually accept JOINs into the
             * RTO. Therefore, we also make a temporary copy that we will use to
             * avoid side-effects if this join group is not complex enough to run
             * the RTO.
             */
            final Set<IVariable<?>> doneSetTmp = new LinkedHashSet<IVariable<?>>(
                    doneSet);
            
            // Examine the remaining joins, stopping at the first non-SP.
            for (int i = start.get(); i < arity; i++) {
    
                final IGroupMemberNode child = (IGroupMemberNode) joinGroup
                        .get(i);
    
                if (child instanceof StatementPatternNode) {
                    // SP
                    StatementPatternNode sp = (StatementPatternNode) child;
                    final boolean optional = sp.isOptional();
                    if (onlyRequiredJoins && optional) {
                        /* Do not include the OPTTIONAL join in the join graph. */
                        break;
                    }
                    
                    final List<IConstraint> attachedConstraints = getJoinConstraints(sp);
                    
                    @SuppressWarnings("rawtypes")
                    final Map<IConstraint, Set<IVariable<IV>>> needsMaterialization =
                            new LinkedHashMap<IConstraint, Set<IVariable<IV>>>();
    
                    getJoinConstraints(attachedConstraints, needsMaterialization);
                    
                    if (onlySimpleJoins && !needsMaterialization.isEmpty()) {
                        /*
                         * At least one variable requires (or might require)
                         * materialization.
                         */
                        break;
                    }

                    // Something the RTO can handle.
                    sp = (StatementPatternNode) sp.clone();// TODO Use destructive move.
                    rtoJoinGroup.addChild(sp); // add to group.
                    naccepted++;
                    /*
                     * Note: This assigns ids to the predicates as a
                     * side-effect. Those ids are assigned by the
                     * AST2BOpContext's BOpIdFactory. You can not rely on
                     * specific ID values being assigned, so you need to build a
                     * map and track the correspondence between the SPs and the
                     * predicates.
                     */
                    final Predicate<?> pred = AST2BOpUtility.toPredicate(sp,
                            ctx);
                    preds.add(pred);
                    // tag the SP with predicate's ID.
                    sp.setProperty(Annotations.PREDICATE_ID, pred.getId());
                    if (attachedConstraints != null) {
                        /*
                         * The RTO will figure out where to attach these
                         * constraints.
                         */
                        constraints.addAll(attachedConstraints);
                    }
    
                } else {
                    // Non-SP.
                    if (onlySPs)
                        break;
                    /*
                     * TODO Handle non-SPs in the RTO. See convertJoinGroup()
                     * for how we handle non-SPs during normal query plan
                     * conversion. All of that would also have to be handled
                     * here for the RTO to allow in non-SPs.
                     */
                    throw new UnsupportedOperationException();
                }
    
            }

            if (naccepted < RTO_MIN_JOINS) {

                // There are not enough joins for the RTO.
                return left;

            }

            /*
             * Since we will run the RTO, we now record any variables that are
             * known to be materialized in order to support the FILTERs
             * associated with the join group that we feed into the RTO.
             */
            doneSet.addAll(doneSetTmp);

        }
        
        final SampleType sampleType = joinGroup.getProperty(
                QueryHints.RTO_SAMPLE_TYPE, QueryHints.DEFAULT_RTO_SAMPLE_TYPE);
        
        final int limit = joinGroup.getProperty(QueryHints.RTO_LIMIT,
                QueryHints.DEFAULT_RTO_LIMIT);
        
        final int nedges = joinGroup.getProperty(QueryHints.RTO_NEDGES,
                QueryHints.DEFAULT_RTO_NEDGES);
        
        left = new JoinGraph(leftOrEmpty(left),//
                new NV(BOp.Annotations.BOP_ID, ctx.nextId()),//
                new NV(BOp.Annotations.EVALUATION_CONTEXT,
                        BOpEvaluationContext.CONTROLLER),//
                new NV(BOp.Annotations.CONTROLLER, true),// Drop "CONTROLLER" annotation?
                // new NV(PipelineOp.Annotations.MAX_PARALLEL, 1),//
                // new NV(PipelineOp.Annotations.LAST_PASS, true),// required
//                new NV(JoinGraph.Annotations.SELECTED, selectVars
//                        .toArray(new IVariable[selectVars.size()])),//
                new NV(JoinGraph.Annotations.VERTICES,
                        preds.toArray(new Predicate[preds.size()])),//
                new NV(JoinGraph.Annotations.CONSTRAINTS, constraints
                        .toArray(new IConstraint[constraints.size()])),//
                new NV(JoinGraph.Annotations.JOIN_GROUP, rtoJoinGroup),//
                new NV(JoinGraph.Annotations.LIMIT, limit),//
                new NV(JoinGraph.Annotations.NEDGES, nedges),//
                new NV(JoinGraph.Annotations.SAMPLE_TYPE, sampleType.name()),//
                new NV(JoinGraph.Annotations.DONE_SET, doneSetIn),//
                new NV(JoinGraph.Annotations.NT, new NT(ctx.getNamespace(),
                        ctx.getTimestamp()))//
        );

        // These joins were consumed.
        start.addAndGet(naccepted);

        return left;
        
    }

    /**
     * Compile a join graph into a query plan.
     * 
     * @param queryEngine
     *            The {@link QueryEngine} on which the RTO has been executing
     *            and on which the returned query plan may be executed.
     * @param joinGraph
     *            The operator that executed the RTO.
     * @param path
     *            The join path that was selected for full execution by the RTO
     *            based on deep sampling of the join graph.
     * 
     * @return The query plan to fully execute that join graph.
     * 
     *         FIXME RTO: Modify this to use AST2BOpUtility#convertJoinGroup().
     *         We would have to reorder the joins in the
     *         {@link JoinGraph.Annotations#JOIN_GROUP} into the ordering
     *         specified in the {@link Path} and make sure that
     *         convertJoinGroup() did not attempt to recursively reapply the
     *         RTO. This will get rid of one of the few remaining uses of
     *         {@link PartitionedJoinGroup}.
     */
    public static PipelineOp compileJoinGraph(final QueryEngine queryEngine,
            final JoinGraph joinGraph, final Path path) {

        if (queryEngine == null)
            throw new IllegalArgumentException();
        
        if (joinGraph == null)
            throw new IllegalArgumentException();

        if (path == null)
            throw new IllegalArgumentException();

//        final IVariable<?>[] selected = joinGraph.getSelected();

        final IPredicate<?>[] predicates = path.getPredicates();

        final IConstraint[] constraints = joinGraph.getConstraints();

        final Set<IVariable<?>> doneSet = new LinkedHashSet<IVariable<?>>(
                joinGraph.getDoneSet());

        /*
         * The AST JoinGroupNode for the joins and filters that we are running
         * through the RTO.
         */
        final JoinGroupNode rtoJoinGroup = (JoinGroupNode) joinGraph
                .getRequiredProperty(JoinGraph.Annotations.JOIN_GROUP);

        // Build an index over the bopIds in that JoinGroupNode.
        final Map<Integer, StatementPatternNode> index = getIndex(rtoJoinGroup);

        // Factory avoids reuse of bopIds assigned to the predicates.
        final BOpIdFactory idFactory = new BOpIdFactory();

        // Reserve ids used by the vertices of the join graph.
        idFactory.reserveIds(predicates);

        // Reserve ids used by the constraints on the join graph.
        idFactory.reserveIds(constraints);

        /*
         * Figure out which constraints are attached to which predicates.
         */
        final IConstraint[][] constraintAttachmentArray = PartitionedJoinGroup
                .getJoinGraphConstraints(predicates, constraints,
                        null/* knownBound */, true/* pathIsComplete */);

        // Create an execution context for the query.
        final AST2BOpContext ctx = getExecutionContext(queryEngine,
                // Identifies the KB instance (namespace and timestamp).
                (NT) joinGraph.getRequiredProperty(JoinGraph.Annotations.NT));

        // Start with an empty plan.
        PipelineOp left = null;

        for (int i = 0; i < predicates.length; i++) {

            final Predicate<?> pred = (Predicate<?>) predicates[i];

            final IConstraint[] attachedJoinConstraints = constraintAttachmentArray[i];

            final boolean optional = pred.isOptional();

            // Lookup the AST node for that predicate.
            final StatementPatternNode sp = index.get(pred.getId());
            
            left = join(left, //
                    pred, //
                    optional ? new LinkedHashSet<IVariable<?>>(doneSet)
                            : doneSet, //
                    attachedJoinConstraints == null ? null : Arrays
                            .asList(attachedJoinConstraints),//
                    null, // cutoff join limit
                    sp.getQueryHints(),//
                    ctx);

        }

//        if (selected != null && selected.length != 0) {
//
//            // Drop variables that are not projected out.
//            left = applyQueryHints(new ProjectionOp(//
//                    leftOrEmpty(left), //
//                    new NV(ProjectionOp.Annotations.BOP_ID, idFactory.nextId()),//
//                    new NV(ProjectionOp.Annotations.SELECT, selected)//
//                    ), rtoJoinGroup, ctx);
//
//        }
        
        return left;

    }

    /**
     * Return an execution context that may be used to execute a cutoff join
     * during sampling or the entire join path once it has been identified.
     * 
     * @param queryEngine
     *            The query engine on which the RTO is executing.
     * @param nt
     *            The namespace and timestamp of the KB view against which the
     *            RTO is running.
     * 
     * @throws RuntimeException
     *             if there is no such KB instance.
     */
    private static AST2BOpContext getExecutionContext(
            final QueryEngine queryEngine, final NT nt) {

        // The index manager that can be used to resolve the KB view.
        final IIndexManager indexManager = queryEngine.getFederation() == null ? queryEngine
                .getIndexManager() : queryEngine.getFederation();

        // Resolve the KB instance.
        final AbstractTripleStore db = (AbstractTripleStore) indexManager
                .getResourceLocator().locate(nt.getName(), nt.getTimestamp());

        if (db == null)
            throw new RuntimeException("No such KB? " + nt);

        /*
         * An empty container. You can not use any of the top-level SPARQL query
         * conversion routines with this container, but it is enough for the
         * low-level things that we need to run the RTO.
         */
        final ASTContainer astContainer = new ASTContainer(BOp.NOARGS,
                BOp.NOANNS);
        
        return new AST2BOpContext(astContainer, db);

    }

    /**
     * Return a map from the {@link Annotations#PREDICATE_ID} to the
     * corresponding {@link StatementPatternNode}.
     * 
     * @param op
     *            The join group.
     * 
     * @return The index, which is immutable and thread-safe.
     * 
     * @throws DuplicateBOpIdException
     *             if there are two or more {@link BOp}s having the same
     *             {@link Annotations#PREDICATE_ID}.
     * @throws BadBOpIdTypeException
     *             if the {@link Annotations#PREDICATE_ID} is not an
     *             {@link Integer}.
     * @throws NoBOpIdException
     *             if a {@link StatementPatternNode} does not have a
     *             {@link Annotations#PREDICATE_ID}.
     */
    static private Map<Integer, StatementPatternNode> getIndex(
            final JoinGroupNode op) {
        if (op == null)
            throw new IllegalArgumentException();
        final LinkedHashMap<Integer, StatementPatternNode> map = new LinkedHashMap<Integer, StatementPatternNode>();
        final Iterator<IGroupMemberNode> itr = op.iterator();
        while (itr.hasNext()) {
            final BOp t = itr.next();
            if(!(t instanceof StatementPatternNode)) {
                // Skip non-SP nodes.
                continue;
            }
            final StatementPatternNode sp = (StatementPatternNode) t;
            final Object x = t.getProperty(Annotations.PREDICATE_ID);
            if (x == null) {
                throw new NoBOpIdException(t.toString());
            }
            if (!(x instanceof Integer)) {
                throw new BadBOpIdTypeException("Must be Integer, not: "
                        + x.getClass() + ": " + Annotations.PREDICATE_ID);
            }
            final Integer id = (Integer) x;
            final BOp conflict = map.put(id, sp);
            if (conflict != null) {
                /*
                 * BOp appears more than once. This is not allowed for
                 * pipeline operators. If you are getting this exception for
                 * a non-pipeline operator, you should remove the bopId.
                 */
                throw new DuplicateBOpIdException("duplicate id=" + id
                        + " for " + conflict + " and " + t);
            }
        }
        // wrap to ensure immutable and thread-safe.
        return Collections.unmodifiableMap(map);
    }

    /**
     * Cutoff join of the last vertex in the join path.
     * <p>
     * <strong>The caller is responsible for protecting against needless
     * re-sampling.</strong> This includes cases where a sample already exists
     * at the desired sample limit and cases where the sample is already exact.
     * 
     * @param queryEngine
     *            The query engine.
     * @param joinGraph
     *            The pipeline operator that is executing the RTO. This defines
     *            the join graph (vertices, edges, and constraints) and also
     *            provides access to the AST and related metadata required to
     *            execute the join graph.
     * @param limit
     *            The limit for the cutoff join.
     * @param predicates
     *            The path segment, which must include the target vertex as the
     *            last component of the path segment.
     * @param constraints
     *            The constraints declared for the join graph (if any). The
     *            appropriate constraints will be applied based on the variables
     *            which are known to be bound as of the cutoff join for the last
     *            vertex in the path segment.
     * @param pathIsComplete
     *            <code>true</code> iff all vertices in the join graph are
     *            incorporated into this path.
     * @param sourceSample
     *            The input sample for the cutoff join. When this is a one-step
     *            estimation of the cardinality of the edge, then this sample is
     *            taken from the {@link VertexSample}. When the edge (vSource,
     *            vTarget) extends some {@link Path}, then this is taken from
     *            the {@link EdgeSample} for that {@link Path}.
     * 
     * @return The result of sampling that edge.
     */
    //synchronized// FIXME REMOVE synchronized. This forces single threading for debugging purposes when chasing out-of-order evaluation exceptions.
    static public EdgeSample cutoffJoin(//
            final QueryEngine queryEngine,//
            final JoinGraph joinGraph,//
            final int limit,//
            final IPredicate<?>[] predicates,//
            final IConstraint[] constraints,//
            final boolean pathIsComplete,//
            final SampleBase sourceSample//
    ) throws Exception {

        if (predicates == null)
            throw new IllegalArgumentException();

        if (limit <= 0)
            throw new IllegalArgumentException();

        // The access path on which the cutoff join will read.
        final IPredicate<?> pred = predicates[predicates.length - 1];

        if (pred == null)
            throw new IllegalArgumentException();

        if (sourceSample == null)
            throw new IllegalArgumentException();

        if (sourceSample.getSample() == null)
            throw new IllegalArgumentException();

        PipelineOp query = null;
        try {

            /*
             * Generate the query plan for cutoff evaluation of that JOIN. The
             * complexity of the query plan depends on whether or not there are
             * FILTERs "attached" to the join that have variable materialization
             * requirements.
             */
            query = getCutoffJoinQuery(queryEngine, joinGraph,
                    limit, predicates, constraints, pathIsComplete, sourceSample);

            if (!runAllJoinsAsComplexJoins && (query instanceof PipelineJoin)
                    && query.arity() == 0) {

                /*
                 * Simple JOIN.
                 * 
                 * Old logic for query execution. Relies on the ability of
                 * the PipelineJoin
                 */
                return runSimpleJoin(queryEngine, sourceSample, limit,
                        (PipelineJoin<?>) query);

            } else {

                /*
                 * Complex JOIN involving variable materialization, conditional
                 * routing operators, filters, and a SLICE to limit the output.
                 */

                return runComplexJoin(queryEngine, sourceSample, limit, query);

            }
        } catch (Throwable ex) {

            /*
             * Add some more information. This gives us the specific predicate.
             * However, it does not tell us the constraints that were attached
             * to that predicate. That information is only available when we are
             * compiling the query plan. At this point, the constraints have
             * been turned into query plan operators.
             */

            throw new RuntimeException("cause=" + ex + "\npred="
                    + BOpUtility.toString(pred) + "\nconstraints="
                    + Arrays.toString(constraints) + (query==null?"":"\nquery="
                    + BOpUtility.toString(query)), ex);

        }

    }

    /**
     * Implementation handles the general case. The general case can result in a
     * physical query plan consisting of multiple operators, including variable
     * materialization requirements, JOINs with remote access paths, etc. The
     * logic for complex joins is handled by {@link AST2BOpJoins} and
     * {@link AST2BOpFilters}s.
     * <p>
     * In the general case, the generated physical query plan has multiple
     * operators and needs to be submitted to the {@link QueryEngine} for
     * evaluation. In order to respect the semantics of cutoff evaluation, the
     * first operator must use an index, e.g., a {@link PipelineJoin}. The
     * remaining operators should not increase the cardinality, e.g., a FILTER
     * never increases the cardinality of the JOIN to which it is attached, but
     * there can be hidden JOINs, notably the {@link ChunkedMaterializationOp}
     * which is basically a JOIN against the dictionary indices.
     * 
     * @see AST2BOpJoins#join(PipelineOp, Predicate, Set, Collection,
     *      Properties, AST2BOpContext)
     */
    private static PipelineOp getCutoffJoinQuery(//
            final QueryEngine queryEngine,//
            final JoinGraph joinGraph,//
            final int limit,//
            final IPredicate<?>[] predicates,//
            final IConstraint[] constraints,//
            final boolean pathIsComplete,//
            final SampleBase sourceSample//
    ) throws Exception {

        // Note: Arguments are checked by caller : cutoffJoin().
        
        /*
         * Create an execution context for the query.
         */
        final AST2BOpContext ctx = getExecutionContext(queryEngine,
                // Identifies the KB instance (namespace and timestamp).
                (NT) joinGraph.getRequiredProperty(JoinGraph.Annotations.NT));

        // Figure out which constraints attach to each predicate.
        // TODO RTO: Replace with StaticAnalysis.
        final IConstraint[][] constraintAttachmentArray = PartitionedJoinGroup
                .getJoinGraphConstraints(predicates, constraints,
                        null/* knownBound */, pathIsComplete);

        // The constraint(s) (if any) for this join.
        final IConstraint[] c = constraintAttachmentArray[predicates.length - 1];

        // The access path on which the cutoff join will read.
        @SuppressWarnings("rawtypes")
        final Predicate pred = (Predicate) predicates[predicates.length - 1];

        // The constraints attached to that join.
        final IConstraint[] attachedJoinConstraints = constraintAttachmentArray[predicates.length - 1];

        /*
         * The AST JoinGroupNode for the joins and filters that we are running
         * through the RTO.
         */
        final JoinGroupNode rtoJoinGroup = (JoinGroupNode) joinGraph
                .getRequiredProperty(JoinGraph.Annotations.JOIN_GROUP);

        // Build an index over the bopIds in that JoinGroupNode.
        final Map<Integer, StatementPatternNode> index = getIndex(rtoJoinGroup);
        
        // Lookup the AST node for that predicate.
        final StatementPatternNode sp = index.get(pred.getId());

        /*
         * Setup factory for bopIds with reservations for ones already in use.
         */
        final BOpIdFactory idFactory = new BOpIdFactory();

        // Reservation for the bopId used by the predicate.
        idFactory.reserve(pred.getId());

        // Reservations for the bopIds used by the constraints.
        idFactory.reserveIds(c);

        final boolean optional = pred.isOptional();

        /*
         * FIXME RTO: doneSet: We should also include in the doneSet anything
         * that is known to have been materialized based on an analysis of the
         * join path (as executed) up to this point in the path. This will let
         * us potentially do less work. This will require tracking the doneSet
         * in the Path and passing the Path into cutoffJoin(). The simplest way
         * to manage this is to just annotate the Path as we go, which means
         * making the Path more AST aware - or at least doneSet aware. Or we can
         * just apply the analysis to each step in the path to figure out what
         * is done each time we setup cutoff evaluation of an operator.
         */
        final Set<IVariable<?>> doneSet = new LinkedHashSet<IVariable<?>>(
                joinGraph.getDoneSet());

        // Start with an empty plan.
        PipelineOp left = null;

        /*
         * Add the JOIN to the query plan.
         * 
         * Note: The generated query plan will potentially include variable
         * materialization to support constraints that can not be evaluated
         * directly by the JOIN (because they depend on variable bindings that
         * are not known to be materialized.)
         */
        left = join(left, //
                pred, //
                optional ? new LinkedHashSet<IVariable<?>>(doneSet)
                        : doneSet, //
                attachedJoinConstraints == null ? null : Arrays
                        .asList(attachedJoinConstraints),//
                Long.valueOf(limit),// cutoff join limit.
                sp.getQueryHints(),//
                ctx);

        /*
         * [left] is now the last operator in the query plan.
         */

        if (!((left instanceof PipelineJoin) && left.arity() == 0)) {

            /*
             * The query plan contains multiple operators.
             */

            /*
             * Add a SLICE to terminate the query when we have [limit] solution
             * out.
             */
            left = applyQueryHints(new SliceOp(leftOrEmpty(left),//
                    new NV(SliceOp.Annotations.BOP_ID, ctx.nextId()),//
                    new NV(SliceOp.Annotations.OFFSET, "0"),//
                    new NV(SliceOp.Annotations.LIMIT, Integer.toString(limit)),//
                    new NV(SliceOp.Annotations.EVALUATION_CONTEXT,
                            BOpEvaluationContext.CONTROLLER),//
                    new NV(SliceOp.Annotations.PIPELINED, true),//
                    new NV(SliceOp.Annotations.MAX_PARALLEL, 1),//
                    new NV(SliceOp.Annotations.REORDER_SOLUTIONS,false),//
                    new NV(SliceOp.Annotations.SHARED_STATE, true)//
                    ), rtoJoinGroup, ctx);

        }

        if (log.isDebugEnabled())
            log.debug("RTO cutoff join query::\n" + BOpUtility.toString(left)
                    + "\npred::" + pred);

        if (checkQueryPlans) {

            checkQueryPlan(left);
            
        }

       return left;
        
    }
    
    /**
     * Debug code checks the query plan for patterns that could cause
     * {@link OutOfOrderEvaluationException}s.
     * <p>
     * <ul>
     * <li>This looks for query plans that use the alternate sink. Operators
     * that route some solutions to the default sink and some solutions to the
     * alternate sink can cause out of order evaluation. Out of order evaluation
     * is not compatible with cutoff JOIN evaluation.</li>
     * <li>This looks for operators that do not restrict
     * {@link PipelineOp.Annotations#MAX_PARALLEL} to ONE (1).</li>
     * </ul>
     * 
     * TODO We probably need an interface to determine whether an operator (as
     * configured) guarantee in order evaluation. Some operators can not be
     * configured for in order evaluation. Others, including a hash join using a
     * linked hash map for the buckets, can preserve order of the source
     * solutions in their output. This would also be useful for creating query
     * plans that are order preserving when an index order corresponds to the
     * desired output order.
     * 
     * TODO Extend this to check the RTO plan for ordered evaluation and then
     * use this in the test suite. The plan must be provable oredered. We could
     * also use this for order preserving query plans for other purposes.
     */
    private static void checkQueryPlan(final PipelineOp left) {

        final Iterator<PipelineOp> itr = BOpUtility.visitAll(left,
                PipelineOp.class);

        while (itr.hasNext()) {

            final PipelineOp tmp = itr.next();

            if (tmp.getProperty(PipelineOp.Annotations.ALT_SINK_REF) != null) {

                // alternative sink is disallowed.
                throw new RuntimeException("Query plan uses altSink: op="
                        + tmp.toShortString());

            }

            if (tmp.getMaxParallel() != 1) {

                // parallel execution of an operator is disallowed.
                throw new RuntimeException("RTO "
                        + PipelineOp.Annotations.MAX_PARALLEL
                        + ": expected=1, actual=" + tmp.getMaxParallel()
                        + ", op=" + tmp.toShortString());

            }


            if (tmp.isReorderSolutions()) {

                // reordering of solutions is disallowed.
                throw new RuntimeException("RTO "
                        + PipelineOp.Annotations.REORDER_SOLUTIONS
                        + ": expected=false, actual="
                        + tmp.isReorderSolutions() + ", op="
                        + tmp.toShortString());

            }

            if (tmp instanceof PipelineJoin) {

                final PipelineJoin<?> t = (PipelineJoin<?>) tmp;

                final int maxParallelChunks = t.getMaxParallelChunks();

                if (maxParallelChunks != 0) {

                    throw new RuntimeException("PipelineJoin: "
                            + PipelineJoin.Annotations.MAX_PARALLEL_CHUNKS
                            + "=" + maxParallelChunks
                            + " but must be ZERO (0):: op=" + t.toShortString());

                }

                final boolean coalesceDuplicateAccessPaths = t
                        .getProperty(
                                PipelineJoin.Annotations.COALESCE_DUPLICATE_ACCESS_PATHS,
                                PipelineJoin.Annotations.DEFAULT_COALESCE_DUPLICATE_ACCESS_PATHS);

                if (coalesceDuplicateAccessPaths) {

                    throw new RuntimeException(
                            "PipelineJoin: "
                                    + PipelineJoin.Annotations.COALESCE_DUPLICATE_ACCESS_PATHS
                                    + "=" + coalesceDuplicateAccessPaths
                                    + " but must be false:: op="
                                    + t.toShortString());

                }
                
            }
            
        }

    }

    /**
     * Run a simple cutoff join on the {@link QueryEngine}. This method relies
     * on the ability to control the {@link PipelineJoin} such that it does not
     * reorder the solutions.
     * 
     * @param queryEngine
     *            The {@link QueryEngine}.
     * @param sourceSample
     *            The source sample (from the upstream vertex).
     * @param limit
     *            The cuttoff join limit.
     * @param joinOp
     *            A query plan consisting of exactly one join. The join must
     *            support cutoff evaluation, e.g., a {@link PipelineJoin}. The
     *            join MUST be marked with the {@link JoinAnnotations#LIMIT}.
     * 
     * @return The sample from the cutoff join of that edge.
     */
    private static EdgeSample runSimpleJoin(//
            final QueryEngine queryEngine,//
            final SampleBase sourceSample,//
            final int limit,//
            final PipelineJoin<?> joinOp) throws Exception {

        if (log.isInfoEnabled())
            log.info("limit=" + limit + ", sourceSample=" + sourceSample
                    + ", query=" + joinOp.toShortString());
        
        // The cutoff limit. This annotation MUST exist on the JOIN.
        if (limit != ((Long) joinOp.getRequiredProperty(JoinAnnotations.LIMIT))
                .intValue())
            throw new AssertionError();
        
        final int joinId = joinOp.getId();
                
        final PipelineOp queryOp = joinOp;
        
        // run the cutoff sampling of the edge.
        final UUID queryId = UUID.randomUUID();
        final IRunningQuery runningQuery = queryEngine.eval(//
                queryId,//
                queryOp,//
                null,// attributes
//                new LocalChunkMessage(queryEngine, queryId,
//                        joinOp.getId()/* startId */, -1 /* partitionId */,
//                        sourceSample.getSample())
                sourceSample.getSample()
                );

        final List<IBindingSet> result = new LinkedList<IBindingSet>();
        try {
            int nresults = 0;
            try {
                IBindingSet bset = null;
                // Figure out the #of source samples consumed.
                final Iterator<IBindingSet> itr = new Dechunkerator<IBindingSet>(
                        runningQuery.iterator());
                while (itr.hasNext()) {
                    bset = itr.next();
                    result.add(bset);
                    if (nresults++ >= limit) {
                        // Break out if cutoff join over produces!
                        break;
                    }
                }
            } finally {
                // ensure terminated regardless.
                runningQuery.cancel(true/* mayInterruptIfRunning */);
            }
        } finally {
            // verify no problems.
            if (runningQuery.getCause() != null) {
                // wrap throwable from abnormal termination.
                throw new RuntimeException(runningQuery.getCause());
            }
        }

        // The join hit ratio can be computed directly from these stats.
        final PipelineJoinStats joinStats = (PipelineJoinStats) runningQuery
                .getStats().get(joinId);

        if (log.isTraceEnabled())
            log.trace(//Arrays.toString(BOpUtility.getPredIds(predicates)) + ": "+
                     joinStats.toString());

        // #of solutions in.
        final int inputCount = (int) joinStats.inputSolutions.get();

        // #of solutions out.
        final long outputCount = joinStats.outputSolutions.get();

        // cumulative range count of the sampled access paths.
        final long sumRangeCount = joinStats.accessPathRangeCount.get();

        /*
         * The #of tuples read from the sampled access paths. This is part of
         * the cost of the join path, even though it is not part of the expected
         * cardinality of the cutoff join.
         * 
         * Note: While IOs is a better predictor of latency, it is possible to
         * choose a pipelined join versus a hash join once the query plan has
         * been decided. Their IO profiles are both correlated to the #of tuples
         * read.
         */
        final long tuplesRead = joinStats.accessPathUnitsIn.get();

        return newEdgeSample(limit, inputCount, outputCount, sumRangeCount,
                tuplesRead, sourceSample, result);

    }

    /**
     * Run a complex cutoff join on the {@link QueryEngine}. This approach is
     * suitable for complex query plans. It tracks each solution in by assigning
     * a row identifier to the source solutions. It then observes the solutions
     * out and figures out how many source solutions were required in order to
     * produce the solutions out.
     * <p>
     * Note: This approach relies on the query plan NOT reordering the source
     * solutions.
     * 
     * @param queryEngine
     *            The {@link QueryEngine}.
     * @param sourceSample
     *            The source sample (from the upstream vertex).
     * @param limit
     *            The cuttoff join limit.
     * @param query
     *            The query plan for cutoff evaluation of a JOIN with optional
     *            variable materialization requirements and FILTERs.
     * 
     * @return The sample from the cutoff join of that edge.
     * 
     * @throws OutOfOrderEvaluationException
     *             This exception is thrown if reordering of the source
     *             solutions can be observed. This is detected if we observe a
     *             row identifier in the output solutions that is LT a
     *             previously observed row identifier in the output solutions.
     *             If this problem is observed, then you must carefully review
     *             the manner in which the query plan is constructed and the
     *             parallelism in the query plan. Any parallelism or reordering
     *             will trip this error.
     * 
     *             TODO If we hit the {@link OutOfOrderEvaluationException} for
     *             some kinds of access paths quads mode access paths, then we
     *             might need to look at the {@link DataSetJoin} in more depth
     *             and the way in which named graph and default graph joins are
     *             being executed for both local and scale-out deployments. One
     *             fall back position is to feed the input solutions in one at a
     *             time in different running queries. This will give us the
     *             exact output cardinality for a given source solution while
     *             preserving parallel evaluation over some chunk of source
     *             solutions. However, this approach can do too much work and
     *             will incur more overhead than injecting a rowid column into
     *             the source solutions.
     */
    private static EdgeSample runComplexJoin(//
            final QueryEngine queryEngine,//
            final SampleBase sourceSample,//
            final int limit,//
            final PipelineOp query) throws Exception {
        
        if (log.isInfoEnabled())
            log.info("limit=" + limit + ", sourceSample=" + sourceSample
                    + ", query=" + query.toShortString());
        
        // Anonymous variable used for the injected column.
        final IVariable<?> rtoVar = Var.var();
        
        // Inject column of values [1:NSAMPLES].
        final IBindingSet[] in = injectRowIdColumn(rtoVar, 1/* start */,
                sourceSample.getSample());
        
        // run the cutoff sampling of the edge.
        final UUID queryId = UUID.randomUUID();
        final IRunningQuery runningQuery = queryEngine.eval(//
                queryId,//
                query,//
                null,// attributes
                in// bindingSet[]
//                new LocalChunkMessage(queryEngine, queryId,
//                        joinOp.getId()/* startId */, -1 /* partitionId */,
//                        sourceSample.getSample())
                );

        /*
         * The assumption is that solutions in are converted into solutions out
         * in an ordered preserving manner. This requires that there is no
         * reordering of the solutions in. If this assumption holds, then we can
         * scan the solutions *out*, counting the #of solutions produced until
         * we reach the limit and tracking the number of solutions *in* required
         * to achieve the limit. The SLICE will cutoff the query once the limit
         * is reached.
         * 
         * Note: we check for both overproduction and out of order evaluation
         * for sanity. If either of those assumptions is violated, then that
         * breaks the foundations on which we are attempting to perform the
         * cutoff evaluation of the JOIN.
         */
        final List<IBindingSet> result = new LinkedList<IBindingSet>();
        final int inputCount; // #of solutions in.
        final long outputCount; // #of solutions out.
        try {
            long nout = 0; // #of solutions out.
            try {
                IBindingSet bset = null;
                // Figure out the #of source samples consumed.
                final Iterator<IBindingSet> itr = new Dechunkerator<IBindingSet>(
                        runningQuery.iterator());
                /*
                 * Injected row id variable.
                 * 
                 * Note: Starts at ZERO in case NO solutions out.
                 */
                int lastRowId = 0; 
                while (itr.hasNext()) {
                    bset = itr.next();
                    final int rowid = ((XSDNumericIV) bset.get(rtoVar).get())
                            .intValue();
//log.warn("rowId="+rowid+",lastRowId="+lastRowId+",bset="+bset);
                    if (rowid < lastRowId && failOutOfOrderEvaluation) {
                        /*
                         * Out of order evaluation makes it impossible to
                         * accurately determine the estimated cardinality of the
                         * join since we can not compute the join hit ratio
                         * without knowing the #of solutions in required to
                         * produce a given #of solutions out.
                         */
                        throw new OutOfOrderEvaluationException();
                    }
                    lastRowId = rowid;
                    bset.clear(rtoVar); // drop injected variable.
                    result.add(bset); // accumulate results.
                    if (nout++ >= limit) {
                        // limit is satisfied.
                        break;
                    }
                }
                inputCount = lastRowId;
                outputCount = nout;
            } finally {
                // ensure terminated regardless.
                runningQuery.cancel(true/* mayInterruptIfRunning */);
            }
        } finally {
            // verify no problems.
            if (runningQuery.getCause() != null) {
                // wrap throwable from abnormal termination.
                throw new RuntimeException(runningQuery.getCause());
            }
        }

        // There should be a single JOIN.
        final PipelineJoin<?> joinOp = BOpUtility.getOnly(query,
                PipelineJoin.class);

        /*
         * We need to work with the correlation between the input solutions and
         * the output solutions that flow through the query.
         */

        // Stats for the JOIN.
        final PipelineJoinStats joinStats = (PipelineJoinStats) runningQuery
                .getStats().get(joinOp.getId());

        if (log.isTraceEnabled())
            log.trace(//Arrays.toString(BOpUtility.getPredIds(predicates)) + ": "+
            "join::" + joinStats);

        // cumulative range count of the sampled access paths.
        final long sumRangeCount = joinStats.accessPathRangeCount.get();

        /*
         * The #of tuples read from the sampled access paths. This is part of
         * the cost of the join path, even though it is not part of the expected
         * cardinality of the cutoff join.
         * 
         * Note: While IOs is a better predictor of latency, it is possible to
         * choose a pipelined join versus a hash join once the query plan has
         * been decided. Their IO profiles are both correlated to the #of tuples
         * read.
         * 
         * TODO Include tuples read for ChunkedMaterializationOp? This does not
         * currently override the newStats() nor track the tuples read. It will
         * need to be modified to do that. This gives us a better measure of the
         * IO cost associated with a JOIN requiring materialization of variable
         * bindings. However, it is only important to make this visible if it is
         * a factor in the relative cost of two path segments or deciding
         * between an index nested joina and a hash join against an access path.
         */
        final long tuplesRead = joinStats.accessPathUnitsIn.get();

        return newEdgeSample(limit, inputCount, outputCount, sumRangeCount,
                tuplesRead, sourceSample, result);

    }

    /**
     * Create a new {@link EdgeSample} from the cutoff evaluation of some join.
     * 
     * @param limit
     *            The cutoff evaluation limit.
     * @param inputCount
     *            The number of input solutions consumed to produce the observed
     *            number of solutions out.
     * @param outputCount
     *            The observed number of solutions out.
     * @param sumRangeCount
     *            The sum of the fast range counts for the as-bound invocations
     *            of the JOIN.
     * @param tuplesRead
     *            The #of tuples read from the indices for the JOIN and for any
     *            variable materialization operator(s).
     * @param sourceSample
     *            The input sample to the cutoff join.
     * @param result
     *            The solutions produced by cutoff evaluation of that join.
     *            
     * @return The new {@link EdgeSample}.
     */
    private static EdgeSample newEdgeSample(final int limit,
            final int inputCount, final long outputCount,
            final long sumRangeCount, final long tuplesRead,
            final SampleBase sourceSample,
            final List<IBindingSet> result) {

        if (log.isInfoEnabled())
            log.info("inputCount=" + inputCount + ", outputCount="
                    + outputCount + ", sumRangeCount=" + sumRangeCount
                    + ", tuplesRead=" + tuplesRead + ", sourceSample="
                    + sourceSample);

        // #of solutions out as adjusted for various edge conditions.
        final long adjustedCard;
        
        final EstimateEnum estimateEnum;
        if (sourceSample.estimateEnum == EstimateEnum.Exact
                && outputCount < limit) {
            /*
             * Note: If the entire source vertex is being fed into the cutoff
             * join and the cutoff join outputCount is LT the limit, then the
             * sample is the actual result of the join. That is, feeding all
             * source solutions into the join gives fewer than the desired
             * number of output solutions.
             */
            estimateEnum = EstimateEnum.Exact;
            adjustedCard = outputCount;
        } else if (inputCount == 1 && outputCount == limit) {
            /*
             * If the inputCount is ONE (1) and the outputCount is the limit,
             * then the estimated cardinality is a lower bound as more than
             * outputCount solutions might be produced by the join when
             * presented with a single input solution.
             * 
             * However, this condition suggests that the sum of the sampled
             * range counts is a much better estimate of the cardinality of this
             * join.
             * 
             * For example, consider a join feeding a rangeCount of 16 into a
             * rangeCount of 175000. With a limit of 100, we estimated the
             * cardinality at 1600L (lower bound). In fact, the cardinality is
             * 16*175000. This falsely low estimate can cause solutions which
             * are really better to be dropped.
             */
            // replace outputCount with the sum of the sampled range counts.
            adjustedCard = sumRangeCount;
            estimateEnum = EstimateEnum.LowerBound;
        } else if ((sourceSample.estimateEnum != EstimateEnum.Exact)
        /*
         * && inputCount == Math.min(sourceSample.limit,
         * sourceSample.estimatedCardinality)
         */&& outputCount == 0) {
            /*
             * When the source sample was not exact, the inputCount is EQ to the
             * lesser of the source range count and the source sample limit, and
             * the outputCount is ZERO (0), then feeding in all source solutions
             * is not sufficient to generate any output solutions. In this case,
             * the estimated join hit ratio appears to be zero. However, the
             * estimation of the join hit ratio actually underflowed and the
             * real join hit ratio might be a small non-negative value. A real
             * zero can only be identified by executing the full join.
             * 
             * Note: An apparent join hit ratio of zero does NOT imply that the
             * join will be empty (unless the source vertex sample is actually
             * the fully materialized access path - this case is covered above).
             * 
             * path sourceCard * f ( in read out limit adjCard) = estCard :
             * sumEstCard joinPath 15 4800L * 0.00 ( 200 200 0 300 0) = 0 : 3633
             * [ 3 1 6 5 ]
             */
            estimateEnum = EstimateEnum.Underflow;
            adjustedCard = outputCount;
        } else {
            estimateEnum = EstimateEnum.Normal;
            adjustedCard = outputCount;
        }

        /*
         * Compute the hit-join ratio based on the adjusted cardinality
         * estimate.
         */
        final double f = adjustedCard == 0 ? 0
                : (adjustedCard / (double) inputCount);
        // final double f = outputCount == 0 ? 0
        // : (outputCount / (double) inputCount);

        // estimated output cardinality of fully executed operator.
        final long estCard = (long) (sourceSample.estCard * f);

        /*
         * estimated tuples read for fully executed operator
         * 
         * TODO RTO: estRead: The actual IOs depend on the join type (hash join
         * versus pipeline join) and whether or not the file has index order
         * (segment versus journal). A hash join will read once on the AP. A
         * pipeline join will read once per input solution. A key-range read on
         * a segment uses multi-block IO while a key-range read on a journal
         * uses random IO. Also, remote access path reads are more expensive
         * than sharded or hash partitioned access path reads in scale-out.
         */
        final long estRead = (long) (sumRangeCount * f);

        final EdgeSample edgeSample = new EdgeSample(//
                sourceSample,//
                inputCount,//
                tuplesRead,//
                sumRangeCount,//
                outputCount, //
                adjustedCard,//
                f, //
                // args to SampleBase
                estCard, // estimated output cardinality if fully executed.
                estRead, // estimated tuples read if fully executed.
                limit, //
                estimateEnum,//
                result.toArray(new IBindingSet[result.size()]));

        if (log.isDebugEnabled())
            log.debug(//Arrays.toString(BOpUtility.getPredIds(predicates))
                    "newSample=" + edgeSample);

        return edgeSample;
    
    }
    
    /**
     * Inject (or replace) an {@link Integer} "rowId" column. This does not have
     * a side-effect on the source {@link IBindingSet}s.
     * 
     * @param var
     *            The name of the column.
     * @param start
     *            The starting value for the identifier.
     * @param in
     *            The source {@link IBindingSet}s.
     * 
     * @return The modified {@link IBindingSet}s.
     */
    private static IBindingSet[] injectRowIdColumn(final IVariable<?> var,
            final int start, final IBindingSet[] in) {

        if (in == null)
            throw new IllegalArgumentException();

        final IBindingSet[] out = new IBindingSet[in.length];

        for (int i = 0; i < out.length; i++) {

            final IBindingSet bset = in[i].clone();

            bset.set(var, new Constant<IV>(new XSDNumericIV(start + i)));

            out[i] = bset;

        }

        return out;

    }

}