/* * Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2007. * Copyright James Leigh (c) 2006. * * Licensed under the Aduna BSD-style license. */ package org.openrdf.query.algebra.evaluation.impl; import java.util.ArrayList; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Set; import org.openrdf.query.BindingSet; import org.openrdf.query.Dataset; import org.openrdf.query.algebra.Join; import org.openrdf.query.algebra.TupleExpr; import org.openrdf.query.algebra.evaluation.QueryOptimizer; import org.openrdf.query.algebra.helpers.QueryModelVisitorBase; /** * A query optimizer that re-orders nested Joins. * * @author Arjohn Kampman * @author James Leigh */ public class QueryJoinOptimizer implements QueryOptimizer { protected final EvaluationStatistics statistics; public QueryJoinOptimizer() { this(new EvaluationStatistics()); } public QueryJoinOptimizer(EvaluationStatistics statistics) { this.statistics = statistics; } /** * Applies generally applicable optimizations: path expressions are sorted * from more to less specific. * * @param tupleExpr */ public void optimize(TupleExpr tupleExpr, Dataset dataset, BindingSet bindings) { tupleExpr.visit(new JoinVisitor()); } protected class JoinVisitor extends QueryModelVisitorBase<RuntimeException> { @Override public void meet(Join node) { List<TupleExpr> joinArgs = new LinkedList<TupleExpr>(); getJoinArgs(node, joinArgs); // Process rest of query model before reordering the joins for (TupleExpr joinArg : joinArgs) { joinArg.visit(this); } joinArgs = sortExpressions(joinArgs, new HashSet<String>()); // Build new join hierarchy TupleExpr replacement = joinArgs.get(0); for (int i = 1; i < joinArgs.size(); i++) { replacement = new Join(replacement, joinArgs.get(i)); } // Replace old join hierarchy node.replaceWith(replacement); } protected void getJoinArgs(TupleExpr tupleExpr, List<TupleExpr> joinArgs) { if (tupleExpr instanceof Join) { Join join = (Join)tupleExpr; getJoinArgs(join.getLeftArg(), joinArgs); getJoinArgs(join.getRightArg(), joinArgs); } else { joinArgs.add(tupleExpr); } } /** * Merges the boolean constraints and the path expressions in one single * list. Path expressions are heuristically reordered to minimize query * evaluation time and boolean constraints are inserted between them. The * separate boolean constraints are moved to the start of the list as much * as possible, under the condition that all variables that are used in * the constraint are instantiated by the path expressions that are * earlier in the list. An example combined list might be: * <tt>[(A,B,C), A != foo:bar, (B,E,F), C != F, (F,G,H)]</tt>. */ protected List<TupleExpr> sortExpressions(List<TupleExpr> expressions, Set<String> boundVars) { List<TupleExpr> orderedExpressions = new ArrayList<TupleExpr>(expressions.size()); while (!expressions.isEmpty()) { TupleExpr tupleExpr = selectNextTupleExpr(expressions, boundVars); expressions.remove(tupleExpr); orderedExpressions.add(tupleExpr); boundVars.addAll(tupleExpr.getBindingNames()); } return orderedExpressions; } /** * Selects from a list of tuple expressions the next tuple expression that * should be evaluated. This method selects the tuple expression with * highest number of bound variables, preferring variables that have been * bound in other tuple expressions over variables with a fixed value. */ protected TupleExpr selectNextTupleExpr(List<TupleExpr> expressions, Set<String> boundVars) { double lowestCardinality = Double.MAX_VALUE; TupleExpr result = null; for (TupleExpr tupleExpr : expressions) { // Calculate a score for this tuple expression double cardinality = statistics.getCardinality(tupleExpr, boundVars); if (cardinality < lowestCardinality) { // More specific path expression found lowestCardinality = cardinality; result = tupleExpr; } } return result; } } }