Path.java example

Explorer
blazegraph-master
- database-master
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     licenses@blazegraph.com

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
package com.bigdata.bop.joinGraph.rto;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpUtility;
import com.bigdata.bop.IConstraint;
import com.bigdata.bop.IPredicate;
import com.bigdata.bop.engine.QueryEngine;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpRTO;

/**
 * A join path is an ordered sequence of N {@link Vertex vertices} and
 * represents an ordered series of N-1 joins.
 * <p>
 * During exploration, the {@link Path} is used to develop an estimate of the
 * cost of different join paths which explore the {@link Vertex vertices} in a
 * {@link JGraph join graph}, possibly under some set of {@link IConstraint}s.
 * The estimated cost of the join path is developed from a sample of the initial
 * {@link Vertex} followed by the cutoff sample of each join in the join path.
 * Join paths may be re-sampled in successive rounds at a greater sample size in
 * order to improve the accuracy and robustness of the estimated cost for the
 * join path.
 * <p>
 * Each join path reflects a specific history. The cutoff sample for the initial
 * vertex can be shared across join paths since there is no prior history. This
 * is true even when we re-sample the vertex at the start of each round. The
 * cutoff sample for each join reflects the history of joins. It can only be
 * shared with join paths having the same history up to that vertex. For
 * example, the following join paths can share estimates of the vertices A, B,
 * and C but not D or E.
 * 
 * <pre>
 * p1: {A, B, C, E, D}
 * p2: {A, B, C, D, E}
 * </pre>
 * 
 * This is because their histories diverge after the (B,C) join.
 * <p>
 * In each successive round of exploration, each join path is replaced by one or
 * more one-step extensions of that path. The extensions are generated by
 * considering the {@link Vertex vertices} in the join graph which are not yet
 * in use within the join path. The join paths which spanning the same unordered
 * set of vertices in a given round of exploration compete based on their
 * estimated cost. The winner is the join path with the lowest estimated cost.
 * The losers are dropped from further consideration in order to prune the
 * search space. See {@link JGraph} which manages the expansion and competition
 * among join paths.
 * <p>
 * When considering {@link Vertex vertices} which can extend the join path, we
 * first select constrained joins. Only if there are no remaining constrained
 * joins will a join path be extended by an unconstrained join. A constrained
 * join is one which shares a variable with the existing join path. The variable
 * may either be shared directly via the {@link IPredicate}s or indirectly via
 * an {@link IConstraint} which can be evaluated for the {@link Vertex} under
 * consideration given the set of variables which are already known to be bound
 * for the join path. An unconstrained join is one where there are no shared
 * variables and always results in a full cross-product. Unconstrained joins are
 * not chosen unless there are no available constrained joins.
 */
public class Path {

//    private static final transient Logger log = Logger.getLogger(Path.class);

    /**
     * An ordered list of the vertices in the {@link Path}.
     */
    final Vertex[] vertices;

    /**
     * An ordered list of the {@link IPredicate}s in the {@link #vertices}. This
     * is computed by the constructor and cached as it is used repeatedly.
     */
    private final IPredicate<?>[] preds;

	/**
	 * The sample obtained by the step-wise cutoff evaluation of the ordered
	 * edges of the path.
	 * <p>
	 * Note: This sample is generated one edge at a time rather than by
	 * attempting the cutoff evaluation of the entire join path (the latter
	 * approach does allow us to limit the amount of work to be done to satisfy
	 * the cutoff).
	 * <p>
	 * Note: This is updated when we resample the path prior to expanding the
	 * path with another vertex.
	 */
    EdgeSample edgeSample;// TODO rename pathSample?

	/**
	 * Examine the path. If there is a cardinality underflow, then boost the
	 * sampling limit. Otherwise, increase the sample by the caller's value.
	 * 
	 * @param limitIn
	 *            The default increment for the sample limit.
	 * 
	 * @return The limit to use when resampling this path.
	 */
    public int getNewLimit(final int limitIn) {
    	
		if (edgeSample.estimateEnum == EstimateEnum.Underflow) {

			return edgeSample.limit * 2;
			
		}
		
		return edgeSample.limit + limitIn;
    	
    }
    
    /**
     * The cumulative estimated cardinality of the path. This is zero for an
     * empty path. For a path consisting of a single edge, this is the estimated
     * cardinality of that edge. When creating a new path by adding an edge to
     * an existing path, the cumulative cardinality of the new path is the
     * cumulative cardinality of the existing path plus the estimated
     * cardinality of the cutoff join of the new edge given the input sample of
     * the existing path.
     * 
     * @todo Track this per vertex as well as the total for more interesting
     *       traces in showPath(Path). In fact, that is just the VertexSample
     *       for the initial vertex and the EdgeSample for each subsequent
     *       vertex in path order. The EdgeSamples are maintained in a map
     *       managed by JGraph during optimization.
     */
    final public long sumEstCard;

	/**
	 * The cumulative estimated #of tuples that would be read for this path if
	 * it were to be fully executed (sum(tuplesRead*f) for each step in the
	 * path).
	 */
    final public long sumEstRead;

	/**
	 * The expected cost of this join path if it were to be fully executed. This
	 * is a function of {@link #sumEstCard} and {@link #sumEstRead}. The
	 * former reflects the #of intermediate solutions generated. The latter
	 * reflects the #of tuples read from the disk. These two measures are
	 * tracked separately and then combined into the {@link #sumEstCost}.
	 */
    final public long sumEstCost;

    /**
     * Combine the cumulative expected cardinality and the cumulative expected
     * tuples read to produce an overall measure of the expected cost of the
     * join path if it were to be fully executed.
     * 
     * @return The cumulative estimated cost of the join path.
     * 
     *         TODO Compute this incrementally as estCost using estRead and
     *         estCard and then take the running sum as sumEstCost and update
     *         the JGraph trace appropriately. [Refactor into an IPathCost
     *         interface. It should have visibility into the full path and also
     *         allow visibility into the vertex cost for generality.]
     * 
     *         TODO Add a cost function API, e.g., IPathCost. This gets passed
     *         into Path to compute a score. We also compute a score for a
     *         vertex. Add query hints for both so we can control the behavior.
     *         The default should be estCard, but estRead or a weighted
     *         combination of estCard and estRead are also possible cost
     *         functions.
     */
    private static long getCost(final long sumEstRead, final long sumEstCard) {

		final long total;
//		total = sumEstCard + sumEstRead; // intermediate results + IO.
//		total = sumEstRead; // just IO
		total = sumEstCard; // just intermediate results.

        return total;
        
    }

	@Override
    public String toString() {
        final StringBuilder sb = new StringBuilder();
        sb.append("Path{[");
        boolean first = true;
        for (Vertex v : vertices) {
            if (!first)
                sb.append(",");
            sb.append(v.pred.getId());
            first = false;
        }
//        for (Edge e : edges) {
//            if (!first)
//                sb.append(",");
//            sb.append(e.getLabel());
//            first = false;
//        }
		sb.append("]");
		sb.append(",sumEstRead=" + sumEstRead);
		sb.append(",sumEstCard=" + sumEstCard);
		sb.append(",sumEstCost=" + sumEstCost);
		sb.append(",sample=" + edgeSample);
		sb.append("}");
        return sb.toString();
    }

//    /**
//     * Create an empty path.
//     */
//    public Path() {
////        this.edges = Collections.emptyList();
//        this.vertices = new Vertex[0];
//        this.preds = new IPredicate[0];
//        this.cumulativeEstimatedCardinality = 0;
//        this.sample = null;
//    }

    /**
     * Create a path from a single edge.
     * 
     * @param v0
     *            The initial vertex in the path.
     * @param v1
     *            The 2nd vertex in the path.
     * @param edgeSample
     *            The sample obtained from the cutoff join of (v0,v1).
     */
    public Path(final Vertex v0, final Vertex v1, final EdgeSample edgeSample) {

        if (v0 == null)
            throw new IllegalArgumentException();

        if (v1 == null)
            throw new IllegalArgumentException();

        if (v0.sample == null)
            throw new IllegalArgumentException();

        if (edgeSample == null)
            throw new IllegalArgumentException();

        if (edgeSample.getSample() == null)
            throw new IllegalArgumentException();

        this.vertices = new Vertex[]{v0,v1};

        this.preds = getPredicates(vertices);
        
        this.edgeSample = edgeSample;

		/*
		 * The expected #of tuples read for the full join of (v0,v1). This is
		 * everything which could be visited for [v0] plus the #of tuples read
		 * from [v1] during the cutoff join times the (adjusted) join hit ratio.
		 */
		this.sumEstRead = v0.sample.estCard + edgeSample.estRead;

		/*
		 * The estimated cardinality of the cutoff join of (v0,v1).
		 */
		this.sumEstCard = edgeSample.estCard;

		this.sumEstCost = getCost(this.sumEstRead, this.sumEstCard);
		
    }

	/**
	 * Private constructor used when we extend a path.
	 * 
	 * @param vertices
	 *            The ordered array of vertices in the new path. The last entry
	 *            in this array is the vertex which was used to extend the path.
	 * @param preds
	 *            The ordered array of predicates in the new path (correlated
	 *            with the vertices and passed in since it is already computed
	 *            by the caller).
	 * @param edgeSample
	 *            The sample from the cutoff join of the last vertex added to
	 *            this path.
	 * @param sumEstCard
	 *            The cumulative estimated cardinality of the new path.
	 * @param sumEstRead
	 *            The cumulative estimated tuples read of the new path.
	 */
    private Path(//
            final Vertex[] vertices,//
            final IPredicate<?>[] preds,//
            final EdgeSample edgeSample,//
            final long sumEstCard,//
            final long sumEstRead//
            ) {

        if (vertices == null)
            throw new IllegalArgumentException();

        if (preds == null)
            throw new IllegalArgumentException();

        if (vertices.length != preds.length)
            throw new IllegalArgumentException();

        if (sumEstCard < 0)
            throw new IllegalArgumentException();

        if (edgeSample == null)
            throw new IllegalArgumentException();

        if (edgeSample.getSample() == null)
            throw new IllegalArgumentException();

		this.vertices = vertices;

		this.preds = preds;

		this.edgeSample = edgeSample;

		this.sumEstCard = sumEstCard;

		this.sumEstRead = sumEstRead;

		this.sumEstCost = getCost(this.sumEstRead, this.sumEstCard);
        
    }

    /**
     * Return the #of vertices in this join path.
     */
    public int getVertexCount() {
        
        return vertices.length;
        
    }

    /**
     * Return <code>true</code> iff the {@link Path} contains that
     * {@link Vertex}.
     * 
     * @param v
     *            The vertex
     * 
     * @return true if the vertex is already part of the path.
     */
    public boolean contains(final Vertex v) {

        if (v == null)
            throw new IllegalArgumentException();

        for (Vertex x : vertices) {
         
            if (v == x)
                return true;
            
        }
//        for (Edge e : edges) {
//
//            if (e.v1 == v || e.v2 == v)
//                return true;
//
//        }

        return false;
    }

    /**
     * Return <code>true</code> if this path is an unordered variant of the
     * given path (same vertices in any order).
     * 
     * @param p
     *            Another path.
     * 
     * @return <code>true</code> if this path is an unordered variant of the
     *         given path.
     */
    public boolean isUnorderedVariant(final Path p) {

        if (p == null)
            throw new IllegalArgumentException();

        if (vertices.length != p.vertices.length) {
            /*
             * Fast rejection. This assumes that each edge after the first
             * adds one distinct vertex to the path. That assumption is
             * enforced by #addEdge().
             */
            return false;
        }

        final Vertex[] v1 = this.vertices;
        final Vertex[] v2 = p.vertices;

        if (v1.length != v2.length) {

            // Reject (this case is also covered by the test above).
            return false;
            
        }

        /*
         * Scan the vertices of the caller's path. If any of those vertices
         * are NOT found in this path the paths are not unordered variations
         * of one another.
         */
        for (int i = 0; i < v2.length; i++) {

            final Vertex tmp = v2[i];

            boolean found = false;
            for (int j = 0; j < v1.length; j++) {

                if (v1[j] == tmp) {
                    found = true;
                    break;
                }

            }

            if (!found) {
                return false;
            }

        }

        return true;

    }

    /**
     * Return the vertices in this path (in path order). For the first edge,
     * the minimum cardinality vertex is always reported first (this is
     * critical for producing the correct join plan). For the remaining
     * edges in the path, the unvisited is reported.
     * 
     * @return The vertices (in path order).
     */
    public List<Vertex> getVertices() {

        return Collections.unmodifiableList(Arrays.asList(vertices));

    }

    /**
     * Return the {@link IPredicate}s associated with the vertices of the
     * join path in path order.
     * 
     * @see #getVertices()
     */
    public IPredicate<?>[] getPredicates() {

        return preds;

    }

    /**
     * Return the {@link BOp} identifiers of the predicates associated with
     * each vertex in path order.
     */
    public int[] getVertexIds() {
        
//        return getVertexIds(edges);
        
        return BOpUtility.getPredIds(preds);
        
    }
    
    /**
     * Return the predicates associated with the vertices.
     * 
     * @param vertices
     *            The vertices in the selected evaluation order.
     * 
     * @return The predicates associated with those vertices in the same order.
     */
    static private IPredicate<?>[] getPredicates(final Vertex[] vertices) {

        // The predicates in the same order as the vertices.
        final IPredicate<?>[] preds = new IPredicate[vertices.length];

        for (int i = 0; i < vertices.length; i++) {

            preds[i] = vertices[i].pred;

        }

        return preds;

    }

    /**
     * Return <code>true</code> if this path begins with the given path.
     * 
     * @param p
     *            The given path.
     * 
     * @return <code>true</code> if this path begins with the given path.
     * 
     * @todo unit tests.
     */
    public boolean beginsWith(final Path p) {

        if (p == null)
            throw new IllegalArgumentException();

        if (vertices.length < p.vertices.length) {

            // Proven false since the caller's path is longer.
            return false;
            
        }

        for (int i = 0; i < p.vertices.length; i++) {

            final Vertex vSelf = vertices[i];
            
            final Vertex vOther = p.vertices[i];
            
            if (vSelf != vOther) {
            
                return false;
                
            }
            
        }

        return true;
    }

    /**
     * Return <code>true</code> if this path begins with the given path.
     * 
     * @param p
     *            The given path.
     * 
     * @return <code>true</code> if this path begins with the given path.
     * 
     * @todo unit tests.
     */
    public boolean beginsWith(final int[] ids) {

        if (ids == null)
            throw new IllegalArgumentException();

        if (vertices.length < ids.length) {
            // Proven false since the caller's path is longer.
            return false;
        }

        for (int i = 0; i < ids.length; i++) {

            final int idSelf = vertices[i].pred.getId();
            
            final int idOther = ids[i];
            
            if (idSelf != idOther) {
            
                return false;
                
            }
            
        }

        return true;
    }

    /**
     * Return the first N {@link IPredicate}s in this {@link Path}.
     * 
     * @param length
     *            The length of the path segment.
     * 
     * @return The path segment.
     */
    public IPredicate<?>[] getPathSegment(final int length) {

        if (length > preds.length)
            throw new IllegalArgumentException();

        final IPredicate<?>[] preds2 = new IPredicate[length];
        
        System.arraycopy(preds/* src */, 0/* srcPos */, preds2/* dest */,
                0/* destPos */, length);
        
        return preds2;

    }

    /**
     * Add an edge to a path, computing the estimated cardinality of the new
     * path, and returning the new path. The cutoff join is performed using the
     * {@link #edgeSample} of <i>this</i> join path and the actual access path
     * for the target vertex.
     * 
     * @param queryEngine
     * @param limit
     * @param vnew
     *            The new vertex.
     * @param constraints
     *            The join graph constraints (if any).
	 * @param pathIsComplete
	 *            <code>true</code> iff all vertices in the join graph are
	 *            incorporated into this path.
     * 
     * @return The new path. The materialized sample for the new path is the
     *         sample obtained by the cutoff join for the edge added to the
     *         path.
     * 
     * @throws Exception
     */
    public Path addEdge(final QueryEngine queryEngine,
            final JoinGraph joinGraph, final int limit, final Vertex vnew,
            final IConstraint[] constraints, final boolean pathIsComplete)
            throws Exception {

        if (vnew == null)
            throw new IllegalArgumentException();

        if(contains(vnew))
            throw new IllegalArgumentException(
                "Vertex already present in path: vnew=" + vnew + ", path="
                        + this);

        if (this.edgeSample == null)
            throw new IllegalStateException();

        // The new vertex.
        final Vertex targetVertex = vnew;

		/*
		 * Chain sample the edge.
		 * 
		 * Note: ROX uses the intermediate result I(p) for the existing path as
		 * the input when sampling the edge. The corresponding concept for us is
		 * the sample for this Path, which will have all variable bindings
		 * produced so far. In order to estimate the cardinality of the new join
		 * path we have to do a one step cutoff evaluation of the new Edge,
		 * given the sample available on the current Path.
		 * 
		 * Note: It is possible for the resulting edge sample to be empty (no
		 * solutions). Unless the sample also happens to be exact, this is an
		 * indication that the estimated cardinality has underflowed. We track
		 * the estimated cumulative cardinality, so this does not make the join
		 * path an immediate winner, but it does mean that we can not probe
		 * further on that join path as we lack any intermediate solutions to
		 * feed into the downstream joins. To resolve that, we have to increase
		 * the sample limit (unless the path is the winner, in which case we can
		 * fully execute the join path segment and materialize the results and
		 * use those to probe further).
		 */

        // Ordered array of all predicates including the target vertex.
        final IPredicate<?>[] preds2;
        final Vertex[] vertices2;
        {
            preds2 = new IPredicate[preds.length + 1];

            vertices2 = new Vertex[preds.length + 1];

            System.arraycopy(preds/* src */, 0/* srcPos */, preds2/* dest */,
                    0/* destPos */, preds.length);
            
            System.arraycopy(vertices/* src */, 0/* srcPos */, vertices2/* dest */,
                    0/* destPos */, preds.length);

            preds2[preds.length] = targetVertex.pred;
            
            vertices2[preds.length] = targetVertex;
            
        }

        final EdgeSample edgeSample2 = AST2BOpRTO.cutoffJoin(//
                queryEngine,//
                joinGraph,//
                limit, //
                preds2,//
                constraints,//
                pathIsComplete,//
                this.edgeSample // the source sample.
                );


		// Extend the path.
		final Path tmp = new Path(//
				vertices2,//
				preds2,//
				edgeSample2,//
				this.sumEstCard + edgeSample2.estCard,// sumEstCard
				this.sumEstRead + edgeSample2.estRead // sumEstRead
		);

		return tmp;

    }

    /**
     * Cutoff join of the last vertex in the join path.
     * <p>
     * <strong>The caller is responsible for protecting against needless
     * re-sampling.</strong> This includes cases where a sample already exists
     * at the desired sample limit and cases where the sample is already exact.
     * 
     * @param queryEngine
     *            The query engine.
     * @param joinGraph
     *            The pipeline operator that is executing the RTO. This defines
     *            the join graph (vertices, edges, and constraints) and also
     *            provides access to the AST and related metadata required to
     *            execute the join graph.
     * @param limit
     *            The limit for the cutoff join.
     * @param path
     *            The path segment, which must include the target vertex as the
     *            last component of the path segment.
     * @param constraints
     *            The constraints declared for the join graph (if any). The
     *            appropriate constraints will be applied based on the variables
     *            which are known to be bound as of the cutoff join for the last
     *            vertex in the path segment.
     * @param pathIsComplete
     *            <code>true</code> iff all vertices in the join graph are
     *            incorporated into this path.
     * @param sourceSample
     *            The input sample for the cutoff join. When this is a one-step
     *            estimation of the cardinality of the edge, then this sample is
     *            taken from the {@link VertexSample}. When the edge (vSource,
     *            vTarget) extends some {@link Path}, then this is taken from
     *            the {@link EdgeSample} for that {@link Path}.
     * 
     * @return The result of sampling that edge.
     * 
     * @throws Exception
     */
    static public EdgeSample cutoffJoin(//
            final QueryEngine queryEngine,//
            final JoinGraph joinGraph,//
            final int limit,//
            final IPredicate<?>[] path,//
            final IConstraint[] constraints,//
            final boolean pathIsComplete,//
            final SampleBase sourceSample//
    ) throws Exception {

        // Note: Delegated to the AST/RTO integration class.
        return AST2BOpRTO.cutoffJoin(queryEngine, joinGraph, limit, path,
                constraints, pathIsComplete, sourceSample);

    }

}