/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Sep 15, 2011 */ package com.bigdata.rdf.sparql.ast.optimizers; import java.util.LinkedList; import java.util.List; import java.util.Properties; import com.bigdata.bop.BOp; import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.aggregate.IAggregate; import com.bigdata.rdf.sparql.ast.ASTBase; import com.bigdata.rdf.sparql.ast.GraphPatternGroup; import com.bigdata.rdf.sparql.ast.IGroupMemberNode; import com.bigdata.rdf.sparql.ast.IGroupNode; import com.bigdata.rdf.sparql.ast.IQueryNode; import com.bigdata.rdf.sparql.ast.JoinGroupNode; import com.bigdata.rdf.sparql.ast.NamedSubqueriesNode; import com.bigdata.rdf.sparql.ast.NamedSubqueryInclude; import com.bigdata.rdf.sparql.ast.NamedSubqueryRoot; import com.bigdata.rdf.sparql.ast.QueryNodeWithBindingSet; import com.bigdata.rdf.sparql.ast.QueryRoot; import com.bigdata.rdf.sparql.ast.QueryType; import com.bigdata.rdf.sparql.ast.StaticAnalysis; import com.bigdata.rdf.sparql.ast.SubqueryRoot; import com.bigdata.rdf.sparql.ast.UnionNode; import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext; import com.bigdata.rdf.sparql.ast.service.ServiceNode; import cutthecrap.utils.striterators.Striterator; /** * Lift {@link SubqueryRoot}s into named subqueries when appropriate. This * includes the following cases: * <ul> * <li>Lift out SPARQL 1.1 subqueries which use both LIMIT and ORDER BY. Due to * the interaction of the LIMIT and ORDER BY clause, these subqueries MUST be * run first since they can produce different results if they are run * "as-bound".</li> * <li>Lift out SPARQL 1.1 subqueries involving aggregates. This typically * provides more efficient evaluation than repeated as-bound evaluation of the * sub-select. It also prevents inappropriate sharing of the internal state of * the {@link IAggregate} functions.</li> * <li>Lift out SPARQL 1.1 subqueries if there are no incoming bound variables * which are also projected by the subquery. Such subqueries must be lifted or * we will simply be doing the same work over and over since no bindings will be * projected into the subquery.</li> * <li>Lift out SPARQL 1.1 subqueries if {@link SubqueryRoot#isRunOnce()} is * <code>true</code>. * </ul> * * FIXME The code to lift out sub-selects if there are no join variables has * been disabled since we are not correctly computing the join variables at this * point. Deciding the join variables requires that we either apply heuristics * or the RTO to decide on a join ordering. Once the join order is known we can * then recognize which variables will be definitely bound at the point in the * join group where the subquery would be evaluated. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id: ASTSparql11SubqueryOptimizer.java 5193 2011-09-15 14:18:56Z * thompsonbry $ */ public class ASTSparql11SubqueryOptimizer implements IASTOptimizer { @Override public QueryNodeWithBindingSet optimize( final AST2BOpContext context, final QueryNodeWithBindingSet input) { final IQueryNode queryNode = input.getQueryNode(); final IBindingSet[] bindingSets = input.getBindingSets(); final QueryRoot queryRoot = (QueryRoot) queryNode; final StaticAnalysis sa = new StaticAnalysis(queryRoot, context); // First, process any pre-existing named subqueries. { final NamedSubqueriesNode namedSubqueries = queryRoot .getNamedSubqueries(); if (namedSubqueries != null) { // Note: works around concurrent modification error. final List<NamedSubqueryRoot> list = BOpUtility.toList( namedSubqueries, NamedSubqueryRoot.class); for (NamedSubqueryRoot namedSubquery : list) { liftSubqueries(context, sa, namedSubquery.getWhereClause()); } } } // Now process the main where clause. liftSubqueries(context, sa, queryRoot.getWhereClause()); if(false) { /* * Note: This may be enabled to lift all SPARQL 1.1 subqueries into * named subqueries. However, I think that the better way to handle * this is to run the subqueries either as-bound "chunked" or with * ALL solutions from the parent as their inputs (the extreme case * of chunked). This can also be applied to handling OPTIONAL groups. * * @see https://sourceforge.net/apps/trac/bigdata/ticket/377 */ rewriteSparql11Subqueries(context, sa, queryRoot); } return new QueryNodeWithBindingSet(queryRoot, bindingSets); } /** * Apply all optimizations. */ private void liftSubqueries(final AST2BOpContext context, final StaticAnalysis sa, final GraphPatternGroup<IGroupMemberNode> group) { final int arity = group.arity(); for (int i = 0; i < arity; i++) { final BOp child = (BOp) group.get(i); if (child instanceof GraphPatternGroup<?>) { /* * Note: Do recursion *before* we do the rewrite so we will * rewrite Sub-Sub-Selects. * * FIXME Unit test for sub-sub-select optimization. */ liftSubqueries(context, sa, ((GraphPatternGroup<IGroupMemberNode>) child)); } else if (child instanceof SubqueryRoot) { // Recursion into subqueries. final SubqueryRoot subqueryRoot = (SubqueryRoot) child; liftSubqueries(context, sa, subqueryRoot.getWhereClause()); }else if (child instanceof ServiceNode) { // Do not rewrite things inside of a SERVICE node. continue; } if (!(child instanceof SubqueryRoot)) { continue; } final SubqueryRoot subqueryRoot = (SubqueryRoot) child; if (subqueryRoot.getQueryType() == QueryType.ASK) { /* * FIXME Look at what would be involved in lifting an ASK * sub-query. There are going to be at least two cases. If there * is no join variable, then we always want to lift the ASK * sub-query as it is completely independent of the parent * group. If there is a join variable, then we need to project * solutions which include the join variables from the subquery * and the "ASK". At that point we can hash join against the * projected solutions and the ASK succeeds if the hash join * succeeds. [Add unit tests for this too.] */ continue; } if (needsLifting(subqueryRoot)) { liftSparql11Subquery(context, sa, subqueryRoot); continue; } /* * FIXME We can not correctly predict the join variables at this * time because that depends on the actual evaluation order. This * has been commented out for now because it will otherwise cause * all sub-selects to be lifted out. */ // if(false) { // final Set<IVariable<?>> joinVars = sa.getJoinVars( // subqueryRoot, new LinkedHashSet<IVariable<?>>()); // // if (joinVars.isEmpty()) { // // /* // * Lift out SPARQL 1.1 subqueries for which the RUN_ONCE // * annotation was specified. // */ // // liftSparql11Subquery(context, sa, subqueryRoot); // // continue; // // } // } } } /** * Returns true iff the subquery needs to be lifted into a named subquery include. * * @param subqueryRoot * @return */ public static boolean needsLifting(final SubqueryRoot subqueryRoot) { if (subqueryRoot==null) return false; boolean needsLifting = false; /* * A. Lift out SPARQL 1.1 subqueries which use LIMIT and/or OFFSET. * * The SliceOp in the subquery will cause the IRunningQuery in * which it appears to be interrupted. Therefore, when a SLICE * is required for a subquery we need to lift it out to run it * as a named subquery. * * TODO There may well be other cases that we have to handle * with as-bound evaluation of a Subquery with a LIMIT/OFFSET. * If so, then the subquery will have to be run using the * SubqueryOp. */ needsLifting |= subqueryRoot.hasSlice(); /* * Lift out SPARQL 1.1 subqueries which use both LIMIT and ORDER * BY. Due to the interaction of the LIMIT and ORDER BY clause, * these subqueries MUST be run first since they can produce * different results if they are run "as-bound". * * NOTE: this is redundant with the previous case and outcommented. */ // needsLifting |= subqueryRoot.hasSlice() && subqueryRoot.getOrderBy() != null; /* * Lift out SPARQL 1.1 subqueries which use {@link IAggregate}s. * This typically provides more efficient evaluation than * repeated as-bound evaluation of the sub-select. It also * prevents inappropriate sharing of the internal state of the * {@link IAggregate} functions. */ needsLifting |= StaticAnalysis.isAggregate(subqueryRoot); /* * Lift out SPARQL 1.1 subqueries for which the RUN_ONCE * annotation was specified. */ needsLifting |= subqueryRoot.isRunOnce(); return needsLifting; } private void rewriteSparql11Subqueries(final AST2BOpContext context, final StaticAnalysis sa, final QueryRoot queryRoot) { final Striterator itr2 = new Striterator( BOpUtility.postOrderIterator((BOp) queryRoot.getWhereClause())); itr2.addTypeFilter(SubqueryRoot.class); final List<SubqueryRoot> subqueries = new LinkedList<SubqueryRoot>(); while (itr2.hasNext()) { subqueries.add((SubqueryRoot)itr2.next()); } for(SubqueryRoot subquery : subqueries) { liftSparql11Subquery(context, sa, subquery); } } private void liftSparql11Subquery(final AST2BOpContext context, final StaticAnalysis sa, final SubqueryRoot subqueryRoot) { final IGroupNode<?> parent = subqueryRoot.getParent(); final String newName = "-subSelect-" + context.nextId(); final NamedSubqueryInclude include = new NamedSubqueryInclude(newName); /** * Set query hints from the parent join group. * * @see <a href="http://sourceforge.net/apps/trac/bigdata/ticket/791" > * Clean up query hints </a> */ include.setQueryHints((Properties) parent .getProperty(ASTBase.Annotations.QUERY_HINTS)); /** * Copy across attached join filters. * * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/796" * >Filter assigned to sub-query by query generator is dropped from * evaluation</a> */ include.setAttachedJoinFilters(subqueryRoot.getAttachedJoinFilters()); /* * Note: A SubqueryRoot normally starts out as the sole child of a * JoinGroupNode. However, other rewrites may have written out that * JoinGroupNode and it does not appear to be present for an ASK * subquery. * * Therefore, when the parent of the SubqueryRoot is a JoinGroupNode * having the SubqueryRoot as its only child, we use the parent's parent * in order to replace the JoinGroupNode when we lift out the * SubqueryRoot. Otherwise we use the parent since there is no wrapping * JoinGroupNode (or if there is, it has some other stuff in there as * well). * * BLZG-1542 -> there is an additional thing we need to take care of: * whenever the parent node is an OPTIONAL or MINUS, we * must not remove it, otherwise we would just "drop" an * OPTIONAL or MINUS, thus changing the query's semantics * */ if ((parent instanceof JoinGroupNode) && !((JoinGroupNode)parent).isOptional() && !((JoinGroupNode)parent).isMinus() && ((BOp) parent).arity() == 1 && parent.getParent() != null && !((IGroupNode<?>)parent.getParent() instanceof UnionNode)) { final IGroupNode<IGroupMemberNode> pp = parent.getParent(); // Replace the sub-select with the include. if (((ASTBase) pp).replaceWith((BOp) parent, include) == 0) throw new AssertionError(); } else { // Replace the sub-select with the include. if (((ASTBase) parent).replaceWith((BOp) subqueryRoot, include) == 0) throw new AssertionError(); } final NamedSubqueryRoot nsr = new NamedSubqueryRoot( subqueryRoot.getQueryType(), newName); /** * Copy across query hints from the original subquery. * * @see <a href="http://sourceforge.net/apps/trac/bigdata/ticket/791" > * Clean up query hints </a> */ nsr.setQueryHints(subqueryRoot.getQueryHints()); nsr.setConstruct(subqueryRoot.getConstruct()); nsr.setGroupBy(subqueryRoot.getGroupBy()); nsr.setHaving(subqueryRoot.getHaving()); nsr.setOrderBy(subqueryRoot.getOrderBy()); nsr.setProjection(subqueryRoot.getProjection()); nsr.setSlice(subqueryRoot.getSlice()); nsr.setWhereClause(subqueryRoot.getWhereClause()); nsr.setBindingsClause(subqueryRoot.getBindingsClause()); sa.getQueryRoot().getNamedSubqueriesNotNull().add(nsr); } }