/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package com.bigdata.rdf.sparql.ast.optimizers; import java.util.LinkedList; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.log4j.Logger; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; import com.bigdata.bop.IVariable; import com.bigdata.bop.engine.StaticAnalysisStats; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.constraints.RangeBOp; import com.bigdata.rdf.sparql.ast.JoinGroupNode; import com.bigdata.rdf.sparql.ast.RangeNode; import com.bigdata.rdf.sparql.ast.StatementPatternNode; import com.bigdata.rdf.sparql.ast.StaticAnalysis; import com.bigdata.rdf.sparql.ast.TermNode; import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext; import com.bigdata.rdf.sparql.ast.optimizers.ASTStaticJoinOptimizer.Annotations; import com.bigdata.rdf.spo.SPORelation; import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.util.concurrent.ExecutionExceptions; /** * Attach range counts to all statement patterns in the query. * * @author mikepersonick * */ public class ASTRangeCountOptimizer extends AbstractJoinGroupOptimizer implements IASTOptimizer { private static final transient Logger log = Logger .getLogger(ASTRangeCountOptimizer.class); /** * Optimize the join group. */ @Override protected void optimizeJoinGroup(final AST2BOpContext ctx, final StaticAnalysis sa, final IBindingSet[] bSets, final JoinGroupNode group) { final List<StatementPatternNode> spNodes = group.getStatementPatterns(); if (!spNodes.isEmpty()) { // Always attach the range counts. final int nrExogeneousBindings = bSets==null ? 0 : bSets.length; attachRangeCounts(ctx, spNodes, getExogenousBindings(bSets), nrExogeneousBindings); } } /** * Use the {@link SPORelation} from the database to grab the appropriate * range counts for the {@link StatementPatternNode}s. Only tries to attach * them if the annotation {@link Annotations#ESTIMATED_CARDINALITY} is not * already attached to the node. This makes it possible to write unit tests * without real data. */ protected void attachRangeCounts(final AST2BOpContext ctx, final List<StatementPatternNode> spNodes, final IBindingSet exogenousBindings, final int nrExogeneousBindings) { final AbstractTripleStore db = ctx.getAbstractTripleStore(); // Setup tasks to obtain estimated range counts. final List<Callable<Void>> tasks = new LinkedList<Callable<Void>>(); for (StatementPatternNode sp : spNodes) { if (sp.getProperty(Annotations.ESTIMATED_CARDINALITY) == null) { tasks.add(new RangeCountTask(sp, ctx, exogenousBindings, nrExogeneousBindings)); } } // Obtain range counts in parallel. final List<Future<Void>> futures; try { futures = db.getExecutorService().invokeAll(tasks); } catch (InterruptedException e) { // propagate interrupt. Thread.currentThread().interrupt(); return; } // Check futures for errors. final List<Throwable> causes = new LinkedList<Throwable>(); for (Future<Void> f : futures) { try { f.get(); } catch (InterruptedException e) { log.error(e); causes.add(e); } catch (ExecutionException e) { log.error(e); causes.add(e); } } /* * If there were any errors, then throw an exception listing them. */ if (!causes.isEmpty()) { // Throw exception back to the leader. if (causes.size() == 1) throw new RuntimeException(causes.get(0)); throw new RuntimeException("nerrors=" + causes.size(), new ExecutionExceptions(causes)); } } /** * Task unconditionally obtains the range count for the * {@link StatementPatternNode}. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan * Thompson</a> */ private class RangeCountTask implements Callable<Void> { private final StatementPatternNode sp; private final AST2BOpContext ctx; private final IBindingSet exogenousBindings; private final int nrExogeneousBindings; public RangeCountTask(final StatementPatternNode sp, final AST2BOpContext ctx, final IBindingSet exogenousBindings, final int nrExogeneousBindings) { this.sp = sp; this.ctx = ctx; this.exogenousBindings = exogenousBindings; this.nrExogeneousBindings = nrExogeneousBindings; } @Override public Void call() throws Exception { estimateCardinality(sp, ctx, exogenousBindings, nrExogeneousBindings); return null; } } /** * For testing purposes we can override this method. * @param sp * @param db * @param exogenousBindings */ protected void estimateCardinality(StatementPatternNode sp, final AST2BOpContext ctx, final IBindingSet exogenousBindings, final int nrExogeneousBindings) { final AtomicBoolean usesExogeneousBindings = new AtomicBoolean(false); // unless proven otherwise final IV<?, ?> s = getIV(sp.s(), exogenousBindings, usesExogeneousBindings); final IV<?, ?> p = getIV(sp.p(), exogenousBindings, usesExogeneousBindings); final IV<?, ?> o = getIV(sp.o(), exogenousBindings, usesExogeneousBindings); final IV<?, ?> c = getIV(sp.c(), exogenousBindings, usesExogeneousBindings); final int exogenousBindingsAdjustmentFactor = usesExogeneousBindings.get() ? Math.max(1, nrExogeneousBindings) : 1; estimateCardinalities(sp, s, p, o, c, ctx, exogenousBindingsAdjustmentFactor); } protected void estimateCardinalities(StatementPatternNode sp, final IV<?, ?> s, final IV<?, ?> p, final IV<?, ?> o, final IV<?, ?> c, final AST2BOpContext ctx, final int exogenousBindingsAdjustmentFactor) { final AbstractTripleStore db = ctx.getAbstractTripleStore(); final RangeNode rangeNode = sp.getRange(); final RangeBOp range = rangeNode != null ? rangeNode.getRangeBOp() : null; final IAccessPath<?> ap = db.getAccessPath(s, p, o, c, range); final StaticAnalysisStats saStats = ctx.getStaticAnalysisStats(); long start = System.nanoTime(); final long cardinality = ap.rangeCount(false/* exact */); saStats.registerRangeCountCall(System.nanoTime() - start); // Annotate with the fast range count. sp.setProperty(Annotations.ESTIMATED_CARDINALITY, cardinality*exogenousBindingsAdjustmentFactor); /* * Annotate with the index which would be used if we did not run * access path "as-bound". This is the index that will be used * if we wind up doing a hash join for this predicate. * * TODO It would make sense to lift this annotation into a * different AST optimizer so it is always present. An * optimization for index locality for as-bound evaluation * depends on the presence of this annotation. * * @see https://sourceforge.net/apps/trac/bigdata/ticket/150" * (Choosing the index for testing fully bound access paths * based on index locality) */ sp.setProperty(Annotations.ORIGINAL_INDEX, ap.getKeyOrder()); } /** * Helper method grabs the IV out of the TermNode, doing the appropriate * NULL and constant/var checks. * * @param term * @param exogenousBindings * The externally given bindings (optional). */ @SuppressWarnings("rawtypes") static protected IV getIV(final TermNode term, final IBindingSet exogenousBindings, final AtomicBoolean usesExogeneousBinding) { if (term != null && term.isVariable() && exogenousBindings != null) { @SuppressWarnings("unchecked") final IConstant<IV> c = (IConstant<IV>) exogenousBindings .get((IVariable) term.getValueExpression()); if(c != null) { usesExogeneousBinding.set(true); return c.get(); } } if (term != null && term.isConstant()) { final IV iv = ((IConstant<IV>) term.getValueExpression()).get(); if (iv == null) { throw new AssertionError("this optimizer cannot run with unknown IVs in statement patterns"); } return iv; } else { return null; } } /** * Return the exogenous bindings. * <p> * Note: This is considering only a single exogenous solution. It can not * really use more than one solution to estimate the range counts unless it * does the sum across all exogenous solutions and then somehow combines * that information in order to make a decision on a single query plan which * is "best" overall for those solutions. * <p> * This takes the simplifying assumption that each solution will have the * same pattern of bindings. This is not true of necessity, but it will be * true (for example) if the BINDINGS are from the openrdf API (just one * exogenous solution) or if the BINDINGS are being sent with a SERVICE call * and were generated by some pattern of non-optional JOINs. * <p> * This can get things wrong if there are variables which are only bound in * some of the solutions. The RTO is insensitive to that because it will * feed all source solutions into the first cutoff joins and thus capture * the estimated costs for the data, the query, and the source bindings. * * @param bindingSets * The given solutions (optional). * * @see https://sourceforge.net/apps/trac/bigdata/ticket/412 * (StaticAnalysis#getDefinitelyBound() ignores exogenous variables.) */ static private IBindingSet getExogenousBindings( final IBindingSet[] bindingSets) { if (bindingSets == null || bindingSets.length == 0) return null; return bindingSets[0]; } }