/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Oct 31, 2011 */ package com.bigdata.rdf.sparql.ast.eval; import java.util.Collection; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Set; import org.apache.log4j.Logger; import org.openrdf.query.algebra.StatementPattern.Scope; import com.bigdata.bop.BOp; import com.bigdata.bop.BOpEvaluationContext; import com.bigdata.bop.Constant; import com.bigdata.bop.IConstraint; import com.bigdata.bop.IPredicate; import com.bigdata.bop.IVariable; import com.bigdata.bop.NV; import com.bigdata.bop.NamedSolutionSetRefUtility; import com.bigdata.bop.PipelineOp; import com.bigdata.bop.ap.Predicate; import com.bigdata.bop.ap.filter.BOpFilterBase; import com.bigdata.bop.ap.filter.DistinctFilter; import com.bigdata.bop.cost.ScanCostReport; import com.bigdata.bop.cost.SubqueryCostReport; import com.bigdata.bop.join.AccessPathJoinAnnotations; import com.bigdata.bop.join.DistinctTermScanOp; import com.bigdata.bop.join.FastRangeCountOp; import com.bigdata.bop.join.HTreeHashJoinAnnotations; import com.bigdata.bop.join.HTreeHashJoinOp; import com.bigdata.bop.join.HashJoinAnnotations; import com.bigdata.bop.join.HashJoinOp; import com.bigdata.bop.join.JVMHashJoinOp; import com.bigdata.bop.join.JoinAnnotations; import com.bigdata.bop.join.PipelineJoin; import com.bigdata.bop.rdf.filter.NativeDistinctFilter; import com.bigdata.bop.rdf.filter.StripContextFilter; import com.bigdata.bop.rdf.join.DataSetJoin; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.VTE; import com.bigdata.rdf.internal.impl.TermId; import com.bigdata.rdf.sparql.ast.DatasetNode; import com.bigdata.rdf.sparql.ast.QueryHints; import com.bigdata.rdf.sparql.ast.StatementPatternNode; import com.bigdata.rdf.sparql.ast.VarNode; import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.spo.InGraphHashSetFilter; import com.bigdata.rdf.spo.SPOKeyOrder; import com.bigdata.relation.IRelation; import com.bigdata.relation.accesspath.AccessPath; import com.bigdata.relation.accesspath.ElementFilter; import com.bigdata.relation.accesspath.IElementFilter; import com.bigdata.relation.rule.EmptyAccessPathExpander; /** * Class handles join patterns. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id$ */ public class AST2BOpJoins extends AST2BOpFilters { private static final Logger log = Logger.getLogger(AST2BOpFilters.class); /** * */ protected AST2BOpJoins() { } /** * Add a join for a statement pattern. This handles triples-mode, * named-graph and default graph join patterns whether on a single machine * or on a cluster. * * @param left * @param pred * The predicate describing the statement pattern. * @param doneSet * The set of variables already known to be materialized. * @param constraints * Constraints on that join (optional). * @param cutoffLimit * When non-null, this is the limit for a cutoff join (RTO). * @param queryHints * Query hints associated with that {@link StatementPatternNode}. * @param ctx * The evaluation context. */ @SuppressWarnings("rawtypes") public static PipelineOp join(// PipelineOp left,// Predicate pred,// final Set<IVariable<?>> doneSet,// final Collection<IConstraint> constraints,// final Long cutoffLimit, // final Properties queryHints,// final AST2BOpContext ctx// ) { final int joinId = ctx.nextId(); // annotations for this join. final List<NV> anns = new LinkedList<NV>(); anns.add(new NV(BOp.Annotations.BOP_ID, joinId)); /* * A map containing the materialization requirements for each constraint * attached to this join. This is populated as a side-effect by * getJoinConstraints() (immediately below). If this map is NOT empty * then the JOIN operator will be followed by either manditory or * conditional materialization of variable bindings in order to evaluate * one or more constraints. */ final Map<IConstraint, Set<IVariable<IV>>> needsMaterialization = new LinkedHashMap<IConstraint, Set<IVariable<IV>>>(); /* * Add constraints to the join for that predicate. * * Note: If we are performing cutoff evaluation of a JOIN * [cutoffLimit!=null], then this disables the conditional routing logic * for constraints with SOMETIMES materialization requirements. This is * necessary in order to preserve the order of evaluation. Conditional * routing of solutions causes them to be reordered and that breaks the * ability to accurately estimate the cardinality of the JOIN using * cutoff evaluation. */ anns.add(new NV(JoinAnnotations.CONSTRAINTS, getJoinConstraints2(constraints, needsMaterialization, cutoffLimit == null/* conditionalRouting */))); // true iff there are no constraints that require materialization. anns.add(new NV(Annotations.SIMPLE_JOIN, needsMaterialization.isEmpty())); /* * Pull off annotations before we clear them from the predicate. */ final Scope scope = (Scope) pred.getProperty(Annotations.SCOPE); // true iff this is a quads access path. final boolean quads = pred.getProperty(Annotations.QUADS, Annotations.DEFAULT_QUADS); // when non-null use distinct-term-scan. see #1035. final VarNode distinctTermScanVar = (VarNode) pred .getProperty(StatementPatternNode.Annotations.DISTINCT_TERM_SCAN_VAR); // when non-null use fast-range-count. see #1037. final VarNode fastRangeCountVar = (VarNode) pred .getProperty(StatementPatternNode.Annotations.FAST_RANGE_COUNT_VAR); // pull off the Sesame dataset before we strip the annotations. final DatasetNode dataset = (DatasetNode) pred .getProperty(Annotations.DATASET); // strip off annotations that we do not want to propagate. pred = pred.clearAnnotations(new String[] { Annotations.SCOPE, Annotations.QUADS, Annotations.DATASET, StatementPatternNode.Annotations.DISTINCT_TERM_SCAN_VAR, StatementPatternNode.Annotations.FAST_RANGE_COUNT_VAR }); if (fastRangeCountVar != null) { // fast-range-count. see #1037. left = fastRangeCountJoin(left, anns, pred, dataset, cutoffLimit, fastRangeCountVar, queryHints, ctx); return left; } if (distinctTermScanVar != null) { // distinct-term-scan. see #1035. left = distinctTermScanJoin(left, anns, pred, dataset, cutoffLimit, distinctTermScanVar, queryHints, ctx); return left; } if (quads) { /* * Quads mode. */ // TODO Verifying that the expanders are not present. Take this // assert out once we have proven that the expanders are not present. assert pred.getProperty(IPredicate.Annotations.ACCESS_PATH_EXPANDER) == null; switch (scope) { case NAMED_CONTEXTS: left = namedGraphJoin(left, anns, pred, dataset, cutoffLimit, queryHints, ctx); break; case DEFAULT_CONTEXTS: left = defaultGraphJoin(left, anns, pred, dataset, cutoffLimit, queryHints, ctx); break; default: throw new AssertionError(); } } else { /* * Triples or provenance mode. */ left = triplesModeJoin(left, anns, pred, cutoffLimit, queryHints, ctx); } if (needsMaterialization.isEmpty()) { // No filters. return left; } /* * Add operators to materialization variables (as necessary) and * evaluate filters. */ if (cutoffLimit != null) { left = addNonConditionalMaterializationSteps(left, doneSet, needsMaterialization, cutoffLimit, queryHints, ctx); } else { /* * For each filter which requires materialization steps, add the * materializations steps to the pipeline and then add the * filter to the pipeline. * * Note: This is the old code path. This code path not support * cutoff evaluation of joins because it can reorder the * solutions. */ left = addMaterializationSteps3(left, doneSet, needsMaterialization, queryHints, ctx); } return left; } /** * FIXME We need to handle cutoff joins here or the distinct-term-scan will * not work with the RTO (alternatively, make sure the RTO is only using * pipeline joins when sampling the join graph). * * @see <a href="http://trac.blazegraph.com/ticket/1035" > DISTINCT PREDICATEs * query is slow </a> */ @SuppressWarnings({ "unchecked", "rawtypes" }) private static PipelineOp distinctTermScanJoin(// final PipelineOp left,// final List<NV> anns, // Predicate pred,// final DatasetNode dataset, // final Long cutoffLimitIsIgnored,// final VarNode distinctTermScanVar, // final Properties queryHints, // final AST2BOpContext ctx// ) { final IVariable distinctVar = distinctTermScanVar.getValueExpression(); anns.add(new NV(DistinctTermScanOp.Annotations.DISTINCT_VAR, distinctVar)); // A mock constant used for predicate in which the distinctVar is not // yet bound. final Constant<IV> mockConst = new Constant<IV>(TermId.mockIV(VTE.URI)); // ensure distinctVar is bound in mockPred. final IPredicate mockPred = pred.asBound(distinctVar, mockConst); final SPOKeyOrder keyOrder = SPOKeyOrder.getKeyOrder(mockPred, ctx.isQuads() ? 4 : 3); // Override the key order. pred = (Predicate) pred.setProperty(IPredicate.Annotations.KEY_ORDER, keyOrder); anns.add(new NV(PipelineJoin.Annotations.PREDICATE, pred)); return applyQueryHints( new DistinctTermScanOp(leftOrEmpty(left), NV.asMap(anns .toArray(new NV[anns.size()]))), queryHints, ctx); } /** * Use the {@link FastRangeCountOp} rather than a key-range scan. * * @see <a href="http://trac.blazegraph.com/ticket/1037" > Rewrite SELECT * COUNT(...) (DISTINCT|REDUCED) {single-triple-pattern} as ESTCARD * </a> */ @SuppressWarnings({ "unchecked", "rawtypes" }) private static PipelineOp fastRangeCountJoin(// final PipelineOp left,// final List<NV> anns, // final Predicate pred,// final DatasetNode dataset, // final Long cutoffLimitIsIgnored,// final VarNode fastRangeCountVar, // final Properties queryHints, // final AST2BOpContext ctx// ) { if ( ctx.gpuEvaluation != null && pred.getProperty(GpuAnnotations.EVALUATE_ON_GPU, GpuAnnotations.DEFAULT_EVALUATE_ON_GPU) ) { return ctx.gpuEvaluation.fastRangeCountJoin( left, anns, pred, dataset, cutoffLimitIsIgnored, fastRangeCountVar, queryHints, ctx ); } anns.add(new NV(FastRangeCountOp.Annotations.COUNT_VAR, fastRangeCountVar.getValueExpression())); anns.add(new NV(PipelineJoin.Annotations.PREDICATE, pred)); return applyQueryHints( new FastRangeCountOp(leftOrEmpty(left), NV.asMap(anns .toArray(new NV[anns.size()]))), queryHints, ctx); } /** * Generate a {@link PipelineJoin} for a triples mode access path. * * @param ctx * @param left * @param anns * @param pred * @param queryHints * * @return The join operator. */ private static PipelineOp triplesModeJoin(// final PipelineOp left, // final List<NV> anns, // Predicate<?> pred,// final Long cutoffLimit,// final Properties queryHints,// final AST2BOpContext ctx) { final boolean scaleOut = ctx.isCluster(); if (scaleOut && !ctx.remoteAPs) { /* * All triples queries can run shard-wise in scale-out. */ anns.add(new NV(Predicate.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.SHARDED)); pred = (Predicate<?>) pred.setProperty( Predicate.Annotations.REMOTE_ACCESS_PATH, false); } else { anns.add(new NV(Predicate.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.ANY)); } anns.add(new NV(PipelineJoin.Annotations.PREDICATE, pred)); return newJoin(left, anns, false/* defaultGraphFilter */, null/* summary */, cutoffLimit, queryHints, ctx); } /** * Generate a named graph join (quads mode). * * @param ctx * @param left * @param anns * @param pred * @param dataset * @return * * @todo If the context position is shared by some other variable which we * know to be bound based on the selected join order, then we need to * treat the context variable as bound during this analysis. * * @todo Since we do not know the specific asBound values, but only that * they will be bound, we should defer the SCAN versus SUBQUERY * decision until we actually evaluate that access path. This is * basically a special case of runtime query optimization. */ private static PipelineOp namedGraphJoin(// PipelineOp left, // final List<NV> anns, // Predicate<?> pred,// final DatasetNode dataset, // final Long cutoffLimit,// final Properties queryHints,// final AST2BOpContext ctx) { final boolean scaleOut = ctx.isCluster(); if (scaleOut && !ctx.remoteAPs) { /* * All named graph patterns in scale-out are partitioned (sharded). */ anns.add(new NV(Predicate.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.SHARDED)); pred = (Predicate<?>) pred.setProperty( Predicate.Annotations.REMOTE_ACCESS_PATH, false); } else { anns.add(new NV(Predicate.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.ANY)); } if (dataset == null || dataset.getNamedGraphs() == null) { /* * The dataset is all graphs. C is left unbound and the unmodified * access path is used. */ anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); return newJoin(left, anns, false/* defaultGraphFilter */, null/* summary */, cutoffLimit, queryHints, ctx); } if (pred.get(3/* c */).isConstant()) { /* * C is already bound. The unmodified access path is used. */ anns.add(new NV(PipelineJoin.Annotations.PREDICATE, pred)); return newJoin(left, anns, false/* defaultGraphFilter */, null/* summary */, cutoffLimit, queryHints, ctx); } /* * Note: While a query can mix default and named graph access paths, * there is only one named graph collection and one default graph * collection within the scope of that query. */ final DataSetSummary summary = dataset.getNamedGraphs(); anns.add(new NV(Annotations.NKNOWN, summary.nknown)); if (summary.nknown == 0) { /* * The data set is empty (no graphs). Return a join backed by an * empty access path. */ // force an empty access path for this predicate. pred = (Predicate<?>) pred.setUnboundProperty( IPredicate.Annotations.ACCESS_PATH_EXPANDER, EmptyAccessPathExpander.INSTANCE); anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); return newJoin(left, anns, false/* defaultGraphFilter */, summary, cutoffLimit, queryHints, ctx); } if (summary.nknown == 1) { /* * The dataset contains exactly one graph. Bind C. * * Note: This uses the 2 argument Constant constructor, which * accepts the name of the variable bound to the constant as its * first argument. BOpContext#bind() takes care of propagating the * binding onto the variable for solutions which join. * * @see https://sourceforge.net/apps/trac/bigdata/ticket/359 */ pred = pred.asBound((IVariable<?>) pred.get(3), new Constant<IV<?, ?>>((IVariable) pred.get(3), summary.firstContext)); anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); return newJoin(left, anns, false/* defaultGraphFilter */, summary, cutoffLimit, queryHints, ctx); } /* */ final int accessPathSampleLimit = pred.getProperty( QueryHints.ACCESS_PATH_SAMPLE_LIMIT, ctx.accessPathSampleLimit); final boolean estimateCosts = accessPathSampleLimit >= 0; @SuppressWarnings("rawtypes") final IRelation r = ctx.context.getRelation(pred); final ScanCostReport scanCostReport; final SubqueryCostReport subqueryCostReport; final boolean scanAndFilter; if (estimateCosts) { /* * Estimate cost of SCAN with C unbound. * * Note: We need to use global index view in order to estimate the * cost of the scan even though the scan will be shard-wise when we * actually run the query. */ scanCostReport = ((AccessPath) ctx.context.getAccessPath(r, (Predicate<?>) pred.setProperty( IPredicate.Annotations.REMOTE_ACCESS_PATH, true))) .estimateCost(); anns.add(new NV(Annotations.COST_SCAN, scanCostReport)); /* * Estimate cost of SUBQUERY with C bound (sampling). * * Note: Again, we need to use a remote index view in order to * estimate the cost of the subqueries even though we will use * sharded joins when actually running the query. */ subqueryCostReport = summary.estimateSubqueryCost(ctx.context, accessPathSampleLimit, (Predicate<?>) pred.setProperty( IPredicate.Annotations.REMOTE_ACCESS_PATH, true)); anns.add(new NV(Annotations.COST_SUBQUERY, subqueryCostReport)); scanAndFilter = subqueryCostReport == null || scanCostReport.cost < subqueryCostReport.cost; } else { scanCostReport = null; subqueryCostReport = null; scanAndFilter = pred.getProperty( QueryHints.ACCESS_PATH_SCAN_AND_FILTER, ctx.accessPathScanAndFilter); } if (scanAndFilter) { /* * Scan and filter. C is left unbound. We do a range scan on the * index and filter using an IN constraint. */ // IN filter for the named graphs. final IElementFilter<ISPO> test = new InGraphHashSetFilter<ISPO>( summary.nknown, summary.graphs); // layer filter onto the predicate. pred = pred.addIndexLocalFilter(ElementFilter.newInstance(test)); anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); return newJoin(left, anns, false/* defaultGraphFilter */, summary, cutoffLimit, queryHints, ctx); } else { /* * Setup the data set join (aka parallel subquery). */ // The variable to be bound. final IVariable<?> var = (IVariable<?>) pred.get(3); // The data set join. left = new DataSetJoin(leftOrEmpty(left), new NV(DataSetJoin.Annotations.VAR, var),// new NV(DataSetJoin.Annotations.BOP_ID, ctx.nextId()),// new NV(DataSetJoin.Annotations.GRAPHS, summary.getGraphs()) // ); // if (scaleOut) { // anns.add(new NV(Predicate.Annotations.EVALUATION_CONTEXT, // BOpEvaluationContext.SHARDED)); // pred = (Predicate) pred.setProperty( // Predicate.Annotations.REMOTE_ACCESS_PATH, false); // } else { // anns.add(new NV(Predicate.Annotations.EVALUATION_CONTEXT, // BOpEvaluationContext.ANY)); // } anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); return newJoin(left, anns, false/* defaultGraphFilter */, summary, cutoffLimit, queryHints, ctx); } } /** * Generate a default graph join (quads mode). * * @param ctx * @param left * @param anns * @param pred * @param dataset * @return * * @todo Since we do not know the specific asBound values, but only that * they will be bound, we should defer the SCAN versus SUBQUERY * decision until we actually evaluate that access path. This is * basically a special case of runtime query optimization. */ @SuppressWarnings("rawtypes") private static PipelineOp defaultGraphJoin(// PipelineOp left, // final List<NV> anns, // Predicate<?> pred,// final DatasetNode dataset, // final Long cutoffLimit,// final Properties queryHints,// final AST2BOpContext ctx) { final DataSetSummary summary = dataset == null ? null : dataset.getDefaultGraphs(); final boolean scaleOut = ctx.isCluster(); if (dataset != null && summary == null) { pred = pred.addAccessPathFilter(StripContextFilter.newInstance()); anns.add(new NV(PipelineJoin.Annotations.PREDICATE, pred)); return newJoin(left, anns, ctx.defaultGraphDistinctFilter, summary, cutoffLimit, queryHints, ctx); } if (summary != null && summary.nknown == 0) { /* * The data set is empty (no graphs). Return a join backed by an * empty access path. */ // force an empty access path for this predicate. pred = (Predicate<?>) pred.setUnboundProperty( IPredicate.Annotations.ACCESS_PATH_EXPANDER, EmptyAccessPathExpander.INSTANCE); anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); return newJoin(left, anns, false/* defaultGraphFilter */, summary, cutoffLimit, queryHints, ctx); } if (summary != null && summary.nknown == 1) { /* * The dataset contains exactly one graph. Bind C. Add a filter to * strip off the context position. */ // Bind C. pred = pred.asBound((IVariable<?>) pred.get(3), new Constant<IV<?, ?>>(summary.firstContext)); if (scaleOut && !ctx.remoteAPs) { // use a partitioned join. anns.add(new NV(Predicate.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.SHARDED)); pred = (Predicate) pred.setProperty( Predicate.Annotations.REMOTE_ACCESS_PATH, false); } // Strip of the context position (do not project C from the join). pred = pred.addAccessPathFilter(StripContextFilter.newInstance()); anns.add(new NV(PipelineJoin.Annotations.PREDICATE, pred)); return newJoin(left, anns, false/* defaultGraphFilter */, summary, cutoffLimit, queryHints, ctx); } /* * TODO This optimization COULD be decided statically if we marked the * predicate with the index would would be used when it was evaluated. * That is known in advance EXCEPT when some joins are optional, * in which case the actual index can not be known until runtime. The * code which attaches the "as-bound" index to the predicate MUST also * consider the exogenous variables (if any). This might be done in the * static join order optimizer, which does consider each join group even * if it does not reorder the joins in a given group (because it was * disabled for that group). */ // if (pred.getKeyOrder().getIndexName().endsWith("C")) { // // /* // * C is not bound. An advancer is imposed on the AP to skip to the // * next possible triple after each match. Impose filter on AP to // * strip off the context position. Distinct filter is not required // * since the advancer pattern used will not report duplicates. // */ // // // Set the CURSOR flag. // pred = (Predicate<?>) pred.setProperty(IPredicate.Annotations.FLAGS, // pred.getProperty(IPredicate.Annotations.FLAGS, // IPredicate.Annotations.DEFAULT_FLAGS) // | IRangeQuery.CURSOR); // @todo also READONLY // // // Set Advancer (runs at the index). // pred = pred.addIndexLocalFilter(new ContextAdvancer()); // // // Filter to strip off the context position. // pred = pred.addAccessPathFilter(StripContextFilter.newInstance()); // // if(scaleOut) { // // /* // * When true, an ISimpleSplitHandler guarantees that no triple // * on that index spans more than one shard. // */ // final SPORelation r = (SPORelation)context.getRelation(pred); // final boolean shardTripleConstraint = r.getContainer().isConstrainXXXCShards(); // // if (shardTripleConstraint) { // // // JOIN is SHARDED. // anns.add(new NV( // BOp.Annotations.EVALUATION_CONTEXT, // BOpEvaluationContext.SHARDED)); // // // AP is LOCAL. // pred = (Predicate<?>) pred.setProperty( // IPredicate.Annotations.REMOTE_ACCESS_PATH, false); // // } else { // // // JOIN is ANY. // anns.add(new NV( // BOp.Annotations.EVALUATION_CONTEXT, // BOpEvaluationContext.ANY)); // // // AP is REMOTE. // pred = (Predicate<?>) pred.setProperty( // IPredicate.Annotations.REMOTE_ACCESS_PATH, true); // // } // // } // // return applyQueryHints(new PipelineJoin(new BOp[] { left, pred }, anns // .toArray(new NV[anns.size()])),queryHints); // // } /* * Decide on a SCAN+FILTER or PARALLEL SUBQUERY plan for the default * graph AP. * * Note: The DataSetJoin approach used for named graph access paths does * not work for default graph access paths because it changes the scope * of the DISTINCT SPO filter and winds up letting through duplicate * SPOs. * * @see https://sourceforge.net/apps/trac/bigdata/ticket/407 */ final int accessPathSampleLimit = pred.getProperty( QueryHints.ACCESS_PATH_SAMPLE_LIMIT, ctx.accessPathSampleLimit); final boolean estimateCosts = accessPathSampleLimit >= 0; final ScanCostReport scanCostReport; final SubqueryCostReport subqueryCostReport; final boolean scanAndFilter; if(true) { /* * TODO The "DGExpander" code appears to function correctly, but it * can do way too much work and take way too long as the #of graphs * in the data set increases for at least some shapes of the data * and the queries. However, we currently lack a means to detect * cases where the PARALLEL SUBQUERY plan is faster than the * SCAN+FILTER. The approach coded here does not make the correct * decisions for reasons which seem to have more to do with the data * density / sparsity for the APs which would be used for * SCAN+FILTER versus PARALLEL SUBQUERY. Therefore the PARALLEL * SUBQUERY path for default graph access paths is currently * disabled. * * @see https://sourceforge.net/apps/trac/bigdata/ticket/407 */ scanCostReport = null; subqueryCostReport = null; scanAndFilter = true; } else { if (estimateCosts) { /* * Estimate cost of SCAN with C unbound. * * Note: We need to use the global index view in order to estimate * the cost of the scan regardless of whether the query runs with * partitioned or global index views when it is evaluated. */ final IRelation r = ctx.context.getRelation(pred); scanCostReport = ((AccessPath) ctx.context.getAccessPath(r, (Predicate<?>) pred.setProperty( IPredicate.Annotations.REMOTE_ACCESS_PATH, true))) .estimateCost(); anns.add(new NV(Annotations.COST_SCAN, scanCostReport)); /* * Estimate cost of SUBQUERY with C bound (sampling). * * Note: We need to use the global index view in order to estimate * the cost of the scan regardless of whether the query runs with * partitioned or global index views when it is evaluated. */ subqueryCostReport = dataset == null ? null : summary .estimateSubqueryCost(ctx.context, accessPathSampleLimit, (Predicate<?>) pred.setProperty( IPredicate.Annotations.REMOTE_ACCESS_PATH, true)); anns.add(new NV(Annotations.COST_SUBQUERY, subqueryCostReport)); scanAndFilter = subqueryCostReport == null || scanCostReport.cost < subqueryCostReport.cost; } else { scanCostReport = null; subqueryCostReport = null; scanAndFilter = pred.getProperty( QueryHints.ACCESS_PATH_SCAN_AND_FILTER, ctx.accessPathScanAndFilter); } } if (scanAndFilter) { /* * SCAN AND FILTER. C is not bound. Unless all graphs are used, * layer IN filter on the AP to select for the desired graphs. Layer * a filter on the AP to strip off the context position. Layer a * DISTINCT filter on top of that. */ if (dataset != null) { // IN filter for the named graphs. final IElementFilter<ISPO> test = new InGraphHashSetFilter<ISPO>( summary.nknown, summary.graphs); // layer filter onto the predicate. pred = pred .addIndexLocalFilter(ElementFilter.newInstance(test)); } // Filter to strip off the context position. pred = pred.addAccessPathFilter(StripContextFilter.newInstance()); // // Filter for distinct SPOs. (moved inside of newJoin). // pred = pred.addAccessPathFilter(newDistinctFilter(pred, summary)); if (scaleOut) { /* * Use the global index view so we can impose the distinct * filter. */ anns.add(new NV(Predicate.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.ANY)); pred = (Predicate) pred.setProperty( Predicate.Annotations.REMOTE_ACCESS_PATH, true); } else { anns.add(new NV(Predicate.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.ANY)); } anns.add(new NV(PipelineJoin.Annotations.PREDICATE, pred)); return newJoin(left, anns, ctx.defaultGraphDistinctFilter, summary, cutoffLimit, queryHints, ctx); } else { /* * PARALLEL SUBQUERY. Bind each value of C in turn, issuing parallel * subqueries against the asBound access paths using an expander * pattern and layer on a filter to strip off the context position. * The asBound access paths write on a shared buffer. That shared * buffer is read from by the expander. * * Scale-out: JOIN is ANY or HASHED. AP is REMOTE. */ final boolean dataSetJoin = false; if (dataSetJoin) { /* * Setup the data set join (aka parallel subquery). * * TODO This code path can not be made to work correctly. The * problem is that we wind up with duplicate SPOs, even after * filtering. This is because the scope of the DISTINCT SPO * filter winds up being the JOIN against the B+Tree statement * index but it would need to be ALL joins for a given source * solution flowing through the DataSetJoin. * * @see https://sourceforge.net/apps/trac/bigdata/ticket/407 */ // The variable to be bound. final IVariable<?> var = (IVariable<?>) pred.get(3); // The data set join. left = new DataSetJoin(leftOrEmpty(left), new NV( DataSetJoin.Annotations.VAR, var),// new NV(DataSetJoin.Annotations.BOP_ID, ctx.nextId()),// new NV(DataSetJoin.Annotations.GRAPHS, summary.getGraphs()) // ); } else { /* * Parallel subquery using the DGExpander. * * NOte: See the notes on the DataSetJoin code path for why we * can not use that approach for parallel subquery for default * graph APs. */ final long estimatedRangeCount = subqueryCostReport.rangeCount; // @todo default with query hint to override and relate to // ClientIndexView limit in scale-out. final int maxParallel = 1; //PipelineJoin.Annotations.DEFAULT_MAX_PARALLEL; // Set subquery expander. pred = (Predicate<?>) pred.setUnboundProperty( IPredicate.Annotations.ACCESS_PATH_EXPANDER, new DGExpander(maxParallel, summary.getGraphs(), estimatedRangeCount)); } // Filter to strip off the context position. pred = pred.addAccessPathFilter(StripContextFilter.newInstance()); // // Filter for distinct SPOs. // pred = pred.addAccessPathFilter(newDistinctFilter(pred, summary)); if (scaleOut) { /* * Use the global index view so we can impose the distinct * filter. */ anns.add(new NV(Predicate.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.ANY)); pred = (Predicate) pred.setProperty( Predicate.Annotations.REMOTE_ACCESS_PATH, true); } else { anns.add(new NV(Predicate.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.ANY)); } anns.add(new NV(PipelineJoin.Annotations.PREDICATE,pred)); return newJoin(left, anns, ctx.defaultGraphDistinctFilter, summary, cutoffLimit, queryHints, ctx); } } /** * Return the distinct filter used for a default graph join (distinct SPOs). * <p> * Note: The native memory based DISTINCT filter MUST NOT be used for * pipelined joins. Pipelined joins run "as-bound" and the per-as-bound * cardinality is typically very small (1s to 1000s). However, a hash join * can hit a very large cardinality for the default graph distinct filter * since it sees all SPOs at once. * * @param pred * The predicate. * @param summary * The {@link DataSetSummary} (when available). * @param hashJoin * <code>true</code> iff a hash join was chosen for this * predicate. * @return */ static private BOpFilterBase newDistinctFilter(final AST2BOpContext ctx, final Predicate<?> pred, final DataSetSummary summary, final boolean hashJoin) { // Never use native distinct for as-bound "pipeline" joins. boolean nativeDistinct = hashJoin && ctx.nativeDistinctSPO; if (nativeDistinct) { /* * Examine the cardinality of the predicate to determine whether or * not we should use a DISTINCT SPO filter backed by a persistence * capable data structure against native memory. */ final Long rangeCount = (Long) pred .getProperty(Annotations.ESTIMATED_CARDINALITY); if (rangeCount != null) { if (rangeCount.longValue() < ctx.nativeDistinctSPOThreshold) { // Small range count. nativeDistinct = false; } } else { log.warn("No rangeCount? : " + pred); } } /* * Note: I think that the predicate cardinality is probably much more * important than the #of different contexts in the default graph. You * can have two contexts and 2B cardinality on the range count and wind * up with a DISTINCT 2B SPOs. Therefore I have disabled the following * code path. */ if (false && nativeDistinct && summary != null) { /* * Examine the cardinality of the defaultGraph *contexts*. */ if (summary.nknown < ctx.nativeDistinctSPOThreshold) { // Only a few graphs in the defaultGraph. nativeDistinct = false; } } if (nativeDistinct) { /* * The index that will be used to read on the B+Tree access path. */ @SuppressWarnings({ "unchecked", "rawtypes" }) final SPOKeyOrder indexKeyOrder = SPOKeyOrder.getKeyOrder( (IPredicate) pred, 4/* keyArity */); // Native memory based DISTINCT filter. return NativeDistinctFilter.newInstance(indexKeyOrder); } else { // JVM Based DISTINCT filter. return DistinctFilter.newInstance(); } } /** * Create and return an appropriate type of join. The default is the * pipeline join. A hash join can be selected using the appropriate query * hint. The query hints which control this decision must be annotated on * the {@link IPredicate} by the caller. * * @param left * @param anns * @param defaultGraphFilter * <code>true</code> iff a DISTINCT filter must be imposed on the * SPOs. This is never done for a named graph query. It is * normally done for default graph queries, but there are some * edge cases where the SPOs are provably distinct and we do not * need to bother. * @param summary * The {@link DataSetSummary} (when available). * @param queryHints * The query hints from the dominating operator context. * @param ctx * The evaluation context. * @return * * @see Annotations#HASH_JOIN * @see HashJoinAnnotations#JOIN_VARS * @see Annotations#ESTIMATED_CARDINALITY */ @SuppressWarnings({ "rawtypes", "unchecked" }) static private PipelineOp newJoin(// PipelineOp left, // final List<NV> anns,// final boolean defaultGraphFilter,// final DataSetSummary summary,// final Long cutoffLimit,// final Properties queryHints, // final AST2BOpContext ctx) { // Convert join annotations to a map so we can lookup some stuff. final Map<String, Object> map = NV.asMap(anns.toArray(new NV[anns .size()])); // Look up the predicate for the access path. Predicate<?> pred = (Predicate<?>) map .get(AccessPathJoinAnnotations.PREDICATE); // True iff a hash join was requested for this predicate. final boolean hashJoin = cutoffLimit == null && pred.getProperty(QueryHints.HASH_JOIN, QueryHints.DEFAULT_HASH_JOIN); if (cutoffLimit != null) { /* * Cutoff join (RTO). */ /* * true iff there are no variable materialization requirements for * this join. */ final boolean simpleJoin = ((Boolean) map .get(Annotations.SIMPLE_JOIN)).booleanValue() && !AST2BOpRTO.runAllJoinsAsComplexJoins; // disallow reordering of solutions by the query engine. map.put(PipelineJoin.Annotations.REORDER_SOLUTIONS, Boolean.FALSE); // disallow parallel evaluation of tasks map.put(PipelineOp.Annotations.MAX_PARALLEL, Integer.valueOf(1)); // disallow parallel evaluation of chunks. map.put(PipelineJoin.Annotations.MAX_PARALLEL_CHUNKS, Integer.valueOf(0)); // disable access path coalescing map.put(PipelineJoin.Annotations.COALESCE_DUPLICATE_ACCESS_PATHS, Boolean.FALSE); /* * Disable access path reordering. * * Note: Reordering must be disabled for complex joins since we will * correlate the input solutions and output solutions using a row * identifier. If the solutions are reordered as they flow through * the pipeline, then it will break this correlation and we will no * longer have accurate information about the #of input solutions * required to produce a given number of output solutions. [Simple * joins might not have this requirement since the PipelineJoin is * internally doing the accounting for the #of solutions in and out * of the join.] */ map.put(PipelineJoin.Annotations.REORDER_ACCESS_PATHS, Boolean.FALSE); if (simpleJoin) { // // disable access path coalescing // map.put(PipelineJoin.Annotations.COALESCE_DUPLICATE_ACCESS_PATHS, // Boolean.FALSE); /* * Note: We need to annotation the JOIN operator to eliminate * parallelism, eliminate access path coalescing, and limit the * output of the join. */ // cutoff join. map.put(PipelineJoin.Annotations.LIMIT, Long.valueOf(cutoffLimit)); /* * Note: In order to have an accurate estimate of the join hit * ratio we need to make sure that the join operator runs using * a single PipelineJoinStats instance which will be visible to * us when the query is cutoff. In turn, this implies that the * join must be evaluated on the query controller. * * FIXME RTO: This implies that sampling of scale-out joins must * be done using remote access paths. This assumption and * approach needs to be reviewed. This is probably NOT the case * if we are using a complex pipline (i.e., with chunked * materialization of some variables and/or conditional routing * operations). In fact, if the pipeline is complex, we do not * want to set LIMIT on the JOIN since that could cause the * pipeline to underproduce if the filters wind up eliminating * some solutions. This suggests that we either need to treat * all cutoff joins as the general and NOT put the LIMIT on the * JOIN -or- we need to pass in more information so newJoin() * understands whether it will be required to impose the * cutoffLimit or whether that limit will be imposed by a SLICE * and injecting a column to correlate input and output * solutions. */ map.put(PipelineJoin.Annotations.SHARED_STATE, Boolean.TRUE);// map.put(PipelineJoin.Annotations.EVALUATION_CONTEXT, BOpEvaluationContext.CONTROLLER);// } else { /* * Complex join. * * Note: Complex joins may include operators to materialize of * IVs as RDF Values and evaluate FILTERs that must (or might) * operate on RDF Values. * * FIXME RTO: Are there additional predicate annotations that we * need to override if we are generating a complex query plan to * evaluate the cutoff JOIN? */ /* * FIXME RTO: This appears to be necessary to get reliable * reporting for the sum of the fast range counters over the APs * and the #of tuples read. Why? If we need to use shared state * for reliable computation of cutoff joins then we must use * remote APs for scale-out. */ map.put(PipelineJoin.Annotations.SHARED_STATE, Boolean.TRUE);// } } // cutoffJoin if (defaultGraphFilter) { /* * Filter for distinct SPOs. */ pred = pred.addAccessPathFilter(newDistinctFilter(ctx, pred, summary, hashJoin)); // Update the annotation map with the predicate now that we have // attached the appropriate distinct filter. map.put(AccessPathJoinAnnotations.PREDICATE, pred); } if (hashJoin) { /* * TODO Choose HTree versus JVM hash join operator based on the the * estimated input cardinality to the join. The RTO can give us that * (the static join optimizer does not really provide a decent * estimate of the input/output cardinality of a join). * * TODO If we partition the hash join on a cluster then we should * divide the estimated input cardinality by the #of partitions to * get the estimated input cardinality per partition. */ // final long estimatedInputCardinality = Long.MAX_VALUE; // // final boolean useHTree = estimatedInputCardinality > 20 * Bytes.megabyte; final boolean useHTree = ctx.nativeHashJoins; /* * The join variable(s) are variables which are (a) bound by the * predicate and (b) are known bound in the source solutions. */ final IVariable<?>[] joinVars = (IVariable<?>[]) pred .getRequiredProperty(HashJoinAnnotations.JOIN_VARS); map.put(HashJoinAnnotations.JOIN_VARS, joinVars); /* * Publish the NamedSolutionSetRef. This makes updates in the state * of the hash join visible from the NanoSparqlServer. (Both * versions of the operator require this attribute.) * * BLZG-1608: this needs to be locally scoped, since the named subquery * may be executed multiple times (e.g., when part of an inner subquery * executed through the PipelinedHashIndexAndSolutionSetJoinOp. */ map.put(HashJoinOp.Annotations.NAMED_SET_REF, NamedSolutionSetRefUtility.newInstance(null, "--namedSet-" + ctx.nextId(), joinVars)); /* * Choose the evaluation context. * * Note: On a cluster this MUST be consistent with the decision made * for handling named and default graphs, except that we are free to * choose either SHARDED or HASHED for a hash join. Also, while the * pipeline join can use ANY on a cluster, the hash joins MUST run * on the controller or be either sharded or hash partitioned. */ BOpEvaluationContext evaluationContext = (BOpEvaluationContext) map .get(BOp.Annotations.EVALUATION_CONTEXT); if (evaluationContext == null) { // TODO Should be SHARDED or HASHED on a cluster. evaluationContext = BOpEvaluationContext.CONTROLLER; } else if(evaluationContext == BOpEvaluationContext.ANY) { // ANY is not permitted for a hash join. evaluationContext = BOpEvaluationContext.CONTROLLER; } if (evaluationContext == BOpEvaluationContext.CONTROLLER) { // This is not necessary, but it makes the hash join stats // immediately visible. map.put(PipelineOp.Annotations.SHARED_STATE, true); } map.put(BOp.Annotations.EVALUATION_CONTEXT, evaluationContext); map.put(PipelineOp.Annotations.MAX_PARALLEL, 1); if (useHTree) { map.put(PipelineOp.Annotations.MAX_MEMORY, Long.MAX_VALUE); map.put(PipelineOp.Annotations.LAST_PASS, true); map.put(HTreeHashJoinAnnotations.RELATION_NAME, pred.getRequiredProperty(Predicate.Annotations.RELATION_NAME)); left = new HTreeHashJoinOp(leftOrEmpty(left), map); } else { map.put(PipelineOp.Annotations.PIPELINED, false); left = new JVMHashJoinOp(leftOrEmpty(left), map); } } else { left = new PipelineJoin(leftOrEmpty(left), map); } left = applyQueryHints(left, queryHints, ctx); return left; } }