ASTJoinOrderByTypeOptimizer.java example

Explorer
blazegraph-master
- database-master
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     licenses@blazegraph.com

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Sep 10, 2011
 */

package com.bigdata.rdf.sparql.ast.optimizers;

import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

import org.openrdf.model.URI;

import com.bigdata.bop.BOp;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IValueExpression;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.sparql.ast.ArbitraryLengthPathNode;
import com.bigdata.rdf.sparql.ast.AssignmentNode;
import com.bigdata.rdf.sparql.ast.BindingsClause;
import com.bigdata.rdf.sparql.ast.GraphPatternGroup;
import com.bigdata.rdf.sparql.ast.IGroupMemberNode;
import com.bigdata.rdf.sparql.ast.IJoinNode;
import com.bigdata.rdf.sparql.ast.JoinGroupNode;
import com.bigdata.rdf.sparql.ast.NamedSubqueryInclude;
import com.bigdata.rdf.sparql.ast.PropertyPathUnionNode;
import com.bigdata.rdf.sparql.ast.QueryHints;
import com.bigdata.rdf.sparql.ast.QueryType;
import com.bigdata.rdf.sparql.ast.StatementPatternNode;
import com.bigdata.rdf.sparql.ast.StaticAnalysis;
import com.bigdata.rdf.sparql.ast.SubqueryRoot;
import com.bigdata.rdf.sparql.ast.ZeroLengthPathNode;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpUtility;
import com.bigdata.rdf.sparql.ast.service.ServiceFactory;
import com.bigdata.rdf.sparql.ast.service.ServiceNode;
import com.bigdata.rdf.sparql.ast.service.ServiceRegistry;

/**
 * This optimizer simply puts each type of {@link IGroupMemberNode} within a
 * {@link JoinGroupNode} in the right order with respect to the other types.
 * 
 * TODO TEST SUITE!
 * 
 * This optimizer is deprecated. It can enabled using the query hint
 * {@link QueryHints#DEFAULT_OLD_JOIN_ORDER_OPTIMIZER}. The new optimizer
 * replacing this one is the {@link ASTJoinGroupOrderOptimizer}.
 */
@Deprecated
public class ASTJoinOrderByTypeOptimizer extends AbstractJoinGroupOptimizer 
		implements IASTOptimizer {

//    private static final Logger log = Logger
//            .getLogger(ASTJoinOrderByTypeOptimizer.class);

//    @Override
//    public IQueryNode optimize(AST2BOpContext context, IQueryNode queryNode,
//            IBindingSet[] bindingSets) {
//
//        if (!(queryNode instanceof QueryRoot))
//            return queryNode;
//
//        final QueryRoot queryRoot = (QueryRoot) queryNode;
//        
//        final StaticAnalysis sa = new StaticAnalysis(queryRoot, context);
//
//        // Main WHERE clause
//        {
//
//            @SuppressWarnings("unchecked")
//			final GraphPatternGroup<IGroupMemberNode> whereClause = 
//            	(GraphPatternGroup<IGroupMemberNode>) queryRoot.getWhereClause();
//
//            if (whereClause != null) {
//
//                optimize(context, sa, whereClause);
//                
//            }
//
//        }
//
//        // Named subqueries
//        if (queryRoot.getNamedSubqueries() != null) {
//
//            final NamedSubqueriesNode namedSubqueries = queryRoot
//                    .getNamedSubqueries();
//
//            /*
//             * Note: This loop uses the current size() and get(i) to avoid
//             * problems with concurrent modification during visitation.
//             */
//            for (NamedSubqueryRoot namedSubquery : namedSubqueries) {
//
//                @SuppressWarnings("unchecked")
//				final GraphPatternGroup<IGroupMemberNode> whereClause = 
//                	(GraphPatternGroup<IGroupMemberNode>) namedSubquery.getWhereClause();
//
//                if (whereClause != null) {
//
//                    optimize(context, sa, whereClause);
//
//                }
//
//            }
//
//        }
//
//        // log.error("\nafter rewrite:\n" + queryNode);
//
//        return queryNode;
//
//    }
//
//    private void optimize(final IEvaluationContext ctx, final StaticAnalysis sa,
//    		final GraphPatternGroup<?> op) {
//
//    	if (op instanceof JoinGroupNode) {
//    		
//    		final JoinGroupNode joinGroup = (JoinGroupNode) op;
//    		
//    		if (ASTStaticJoinOptimizer.isStaticOptimizer(ctx, joinGroup)) {
//
//                doOrderByType(ctx, joinGroup, sa);
//
//    		}
//    		
//    	} // is JoinGroupNode
//    	
//        /*
//         * Recursion, but only into group nodes (including within subqueries).
//         */
//        final int arity = op.arity();
//
//        for (int i = 0; i < arity; i++) {
//
//            final BOp child = op.get(i);
//
//            if (child instanceof GraphPatternGroup<?>) {
//
//                @SuppressWarnings("unchecked")
//                final GraphPatternGroup<IGroupMemberNode> childGroup = (GraphPatternGroup<IGroupMemberNode>) child;
//
//                optimize(ctx, sa, childGroup);
//                
//            } else if (child instanceof QueryBase) {
//
//                final QueryBase subquery = (QueryBase) child;
//
//                @SuppressWarnings("unchecked")
//                final GraphPatternGroup<IGroupMemberNode> childGroup = (GraphPatternGroup<IGroupMemberNode>) subquery
//                        .getWhereClause();
//
//                optimize(ctx, sa, childGroup);
//
//            }
//            
//        }
//
//    }

    /**
     * Get the group member nodes into the right order:
     * <pre> 
     * 1. Pre-filters
     * 2. In-filters
     * x. Assignments with a constant
     * 
     *    Required joins:
     *    
     *   3. Some Service calls (e.g. Bigdata SEARCH)
     *   4. Subquery-includes
     *   5. Statement patterns
     *   7. Sparql11 subqueries
     *   8. Non-optional subgroups
     *   9. Other service calls
     *   
     *   TODO: the placement of OPTIONALS should really be more complicated than this.
     *   e.g. consider interaction with SERVICE calls etc.
     *   Optional joins:
     *   10. Simple optionals & optional subgroups
     * 
     * 11. Assignments
     * 12. Post-conditionals
     *     
     * </pre> 
     * Most of this logic was lifted out of {@link AST2BOpUtility}.
     * <p>
     * Note: Join filters are now attached to {@link IJoinNode}s.
     */
	@Override
    protected void optimizeJoinGroup(final AST2BOpContext ctx,
    		final StaticAnalysis sa, final IBindingSet[] bSets, final JoinGroupNode joinGroup) {

		if (!ASTStaticJoinOptimizer.isStaticOptimizer(ctx, joinGroup))
			return;
			
        final List<IGroupMemberNode> ordered = new LinkedList<IGroupMemberNode>();

        final List<AssignmentNode> assignments = joinGroup.getAssignments();

        final List<ServiceNode> serviceNodes = joinGroup.getServiceNodes();

        final List<SubqueryRoot> askSubqueries = new LinkedList<SubqueryRoot>();

        for (BindingsClause values : joinGroup.getChildren(BindingsClause.class)) {

            ordered.add(values);
            
        }

        /*
         * Assignments for a constant.
         * 
         * Note: This supports query engines which use BIND() to convey
         * a binding into a remote SPARQL end point (openrdf does this).
         * For example, see their service09 test.
         */
        {

            final Iterator<AssignmentNode> aitr = assignments.iterator();

            while (aitr.hasNext()) {

                final AssignmentNode n = aitr.next();

                @SuppressWarnings("rawtypes")
                final IValueExpression<? extends IV> valExpr = n
                        .getValueExpression();

                if (valExpr instanceof IConstant) {

                    ordered.add(n);

                    aitr.remove();

                }

            }

        }

        /*
         * Add the pre-conditionals to the pipeline.
         * 
         * TODO These filters should be lifted into the parent group (by a
         * rewrite rule) so we can avoid starting a subquery only to have it
         * failed by a filter. We will do less work if we fail the solution in
         * the parent group.
         */
        for (IGroupMemberNode n : sa.getPreFilters(joinGroup)) {
            
            ordered.add(n);
            
        }
        
        /*
         * FIXME We need to move away from the DataSetJoin class and replace it
         * with an IPredicate to which we have attached an inline access path.
         * That transformation needs to happen in a rewrite rule, which means
         * that we will wind up removing the IN filter and replacing it with an
         * AST node for that inline AP (something conceptually similar to a
         * statement pattern but for a column projection of the variable for the
         * IN expression). That way we do not have to magically "subtract" the
         * known "IN" filters out of the join- and post- filters.
         * 
         * @see https://sourceforge.net/apps/trac/bigdata/ticket/233 (Replace
         * DataSetJoin with an "inline" access path.)
         * 
         * @see JoinGroupNode#getInFilters()
         */
        for (IGroupMemberNode n : joinGroup.getInFilters()) {
            
            ordered.add(n);
            
        }
        
        /*
         * Required joins and non-optional subqueries.
         * 
         * Note: SPARQL 1.1 style subqueries are currently always pipelined.
         * Like named subquery includes, they are also never optional. However,
         * there is no a-priori reason why we should run pipelined subqueries
         * before named subquery includes and, really, no reason why we can not
         * mix these with the required joins (above). I believe that this is
         * being done solely for expediency (because the static query optimizer
         * can not handle it).
         * 
         * Also, note that named subquery includes are hash joins. We have an
         * index. If the operator supported cutoff evaluation then we could
         * easily reorder them with the other required joins using the RTO.
         * 
         * Ditto for pipelined SPARQL 1.1 subquery. If it supported cutoff
         * evaluation, then the RTO could reorder them with the required joins.
         * This is even true when the subquery uses GROUP BY or ORDER BY, which
         * imply the use of at once operators. While we must fully materialize
         * the solutions for each evaluation of the subquery, the evaluation is
         * based on the as-bound solutions flowing into the subquery. If the
         * subquery is unselective, then clearly this will be painful and it
         * might be better to lift such unselective subqueries into named
         * subqueries in order to obtain a hash index over the entire subquery
         * solution set when evaluated with an empty source binding set.
         * 
         * Note: This logic was originally constructed before we had required
         * joins other than on a statement pattern. This shaped how the FILTERs
         * were attached and how the materialization pipeline was generated in
         * order to have materialized RDF Values on hand for those FILTERs.
         * 
         * We now have several kinds of required joins: pipelined statement
         * pattern joins, SPARQL 1.1 subquery, named subquery include, subquery
         * hash joins (when the subquery is optional), service call joins, etc.
         * 
         * FIXME The code currently only handles the FILTER attachment and
         * materialization pipeline for the required statement pattern joins.
         * However, for efficiency, FILTERs MUST be attached to these joins as
         * appropriate for ALL CASES and variables MUST be materialized as
         * required for those filters to run.
         * 
         * FIXME All of these joins can be reordered by either static analysis
         * of cardinality (which has not been extended to handle this yet) or by
         * the RTO. The filter attachment decisions (and the materialization
         * pipeline generation) needs to be deferred until we actually evaluate
         * the join graph (at least for the RTO).
         */
        
        { // begin required joins.

            /*
             * Run some service calls first (or as early as possible) and
             * schedule service calls to be run last
             */
            {

                final Iterator<ServiceNode> sitr = serviceNodes.iterator();

                while (sitr.hasNext()) {

                    final ServiceNode n = sitr.next();

                    if (n.getServiceRef().isConstant()) {

                        final URI serviceURI = (URI) n.getServiceRef()
                                .getValue();

                        final ServiceFactory f = ServiceRegistry.getInstance()
                                .get(serviceURI);

                        
                        if (f!=null) {
                           
                            /**
                             * Queue services in the beginning or in the end.
                             * Note that the query hint can be used to override
                             * the service defaults.
                             */
                            if (f.getServiceOptions().isRunFirst()) {

                                ordered.add(n);

                                sitr.remove();
                              
                            } 
                           
                        }

                    }

                }

            }

            /*
             * Add joins against named solution sets from WITH AS INCLUDE style
             * subqueries for which there are NO join variables. Such includes
             * will be a cross product so we want to run them as early as
             * possible.
             * 
             * Note: This corresponds to a very common use case where the named
             * subquery is used to constrain the remainder of the join group.
             * 
             * Note: If there ARE join variables then the named subquery include
             * MUST NOT be run until after the join variables have been bound.
             * Failure to observe this rule will cause the unbound variable to
             * be included when computing the hash code of a solution and the
             * join will not produce the correct solutions. [If it is desired to
             * always run named subqueries first then you need to make sure that
             * the join variables array is empty for the INCLUDE.]
             */
            for (IGroupMemberNode child : joinGroup) {

                if (child instanceof NamedSubqueryInclude) {
                
                    ordered.add(child);
                    
                }
                
            }

            /*
             * Add required statement pattern joins, the filters on those
             * joins, and property path stuff.
             * 
             * Note: This winds up handling materialization steps as well (it
             * calls through to Rule2BOpUtility).
             */
            for (IGroupMemberNode child : joinGroup) {
            	
            	if (child instanceof StatementPatternNode) {
                
	                final StatementPatternNode sp = (StatementPatternNode) child;
	                
	                if (!sp.isOptional()) {
	                
	                    ordered.add(child);
	                    
	                }
	                
            	} else if (child instanceof ArbitraryLengthPathNode ||
            				child instanceof ZeroLengthPathNode ||
            				  child instanceof PropertyPathUnionNode) {
            		
                    ordered.add(child);
                    
            	}
                
            }
            
            /*
             * TODO Why is this here?!? It should either be empty or run
             * after the last required join, right?
             */
            for (IGroupMemberNode n : sa.getJoinFilters(joinGroup)) {
                
                ordered.add(n);
                
            }
            
            /*
             * Add SPARQL 1.1 style subqueries which were not lifted out into
             * named subqueries.
             */
            for (IGroupMemberNode child : joinGroup) {
                if (child instanceof SubqueryRoot) {
                    final SubqueryRoot subquery = (SubqueryRoot) child;
                    if (subquery.getQueryType() == QueryType.ASK) {
                        /**
                         * ASK subqueries are used for FILTER EXISTS and FILTER
                         * NOT EXISTS. They can not be run before the required
                         * join groups.
                         * 
                         * @see <a
                         *      href="https://sourceforge.net/apps/trac/bigdata/ticket/515">
                         *      Query with two "FILTER NOT EXISTS" expressions
                         *      returns no results</a>
                         */
                        askSubqueries.add(subquery);
                        continue;
                    }
                    ordered.add(child);
                }
            }

            /*
             * Do the non-optional sub-groups (Join groups and UNION).
             */
            for (IGroupMemberNode child : joinGroup) {

                if (!(child instanceof GraphPatternGroup<?>)) {
                    continue;
                }

                @SuppressWarnings("unchecked")
                final GraphPatternGroup<?> subgroup = (GraphPatternGroup<?>) child;

                if (subgroup.isOptional()) {
                    continue;
                }
                
                if (subgroup instanceof PropertyPathUnionNode) {
                	continue;
                }

                ordered.add(subgroup);

            }

            /*
             * Run services which have constant URIs next.
             * 
             * TODO These could be ordered by the #of unbound variables
             * or some such. Simple triple patterns for which we can use
             * ESTCARD could be ordered more precisely.
             */
            {

                final Iterator<ServiceNode> sitr = serviceNodes
                        .iterator();

                while (sitr.hasNext()) {

                    final ServiceNode n = sitr.next();

                    if (!n.getServiceRef().isConstant())
                        continue;

                    sitr.remove();
                    
                    ordered.add(n);

                }

            }

            /*
             * Run remaining service calls (those with a variable
             * service ref that have not been scheduled as run first or run
             * last).
             */
            for (ServiceNode n : serviceNodes) {

                ordered.add(n);

            }

        } // end of required joins.

//      /*
//       * Add the subqueries (individual optional statement patterns, optional
//       * join groups, and nested union).
//       */

        /**
         * Run the ASK subqueries (FILTER EXISTS, FILTER NOT EXISTS).
         * 
         * TODO This should be (I think) a permissible placement for the ASK
         * subqueries. However, we might still run into problems if FILTER (NOT)
         * EXISTS is run for a variable which is only bound by an OPTIONAL.
         * 
         * TODO There could also be a problem with the ordering of MINUS. Both
         * FILTER (NOT) EXISTS and MINUS need further inspection of the
         * constraints on when they may be evaluated, both in terms of
         * efficiency and correctness. I believe that the correct constraint for
         * FILTER (NOT) EXISTS is simply that for FILTER attachment: That is
         * (a)for variables bound by required joins, no sooner than their filter
         * variables are either known to be bound; and (b) for variables only
         * bound by OPTIONALS, not until after the last point at which they
         * MIGHT be bound.
         * 
         * Note: While that while the change for ticket 515 fixes that query, it
         * is possible that we still could get bad join orderings when the
         * variables used by the filter are only bound by OPTIONAL joins. It is
         * also possible that we could run the ASK subquery for FILTER (NOT)
         * EXISTS earlier if the filter variables are bound by required joins.
         * This is really identical to the join filter attachment problem. The
         * problem in the AST is that both the ASK subquery and the FILTER are
         * present. It seems that the best solution would be to attach the ASK
         * subquery to the FILTER and then to run it immediately before the
         * FILTER, letting the existing filter attachment logic decide where to
         * place the filter. We would also have to make sure that the FILTER was
         * never attached to a JOIN since the ASK subquery would have to be run
         * before the FILTER was evaluated.
         * 
         * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/515">
         *      Query with two "FILTER NOT EXISTS" expressions returns no
         *      results</a>
         */
        for (SubqueryRoot askSubquery : askSubqueries) {

            ordered.add(askSubquery);
            
        }
        
//        /*
//         * Next do the property paths.
//         */
//        for (PropertyPathNode pathNode : joinGroup.getChildren(PropertyPathNode.class)) {
//        	
//        	ordered.add(pathNode);
//        	
//        }
        
        /*
         * Next do the optional sub-groups.
         */
        for (IGroupMemberNode child : joinGroup) {

            if (child instanceof StatementPatternNode) {

                final StatementPatternNode sp = (StatementPatternNode) child;

                if (sp.isOptional()) {

                    /*
                     * ASTSimpleOptionalOptimizer will recognize and lift out
                     * simple optionals into the parent join group. A simple
                     * optional is basically a single a statement pattern in an
                     * optional join group. If there were any FILTERs in the
                     * simple optional join group, then they were lifted out as
                     * well and attached to this StatementPatternNode. Such
                     * FILTER(s) MUST NOT have materialization requirements for
                     * variables which were not already bound before the
                     * optional JOIN on this statement pattern.
                     */

                    ordered.add(sp);

                }

            }
            
            if (!(child instanceof GraphPatternGroup<?>)) {
                continue;
            }

            @SuppressWarnings("unchecked")
            final GraphPatternGroup<?> subgroup = (GraphPatternGroup<?>) child;

            if (!subgroup.isOptional()) {
                continue;
            }
            
            if (subgroup instanceof PropertyPathUnionNode) {
            	continue;
            }

            ordered.add(subgroup);

        }
        
        /*
         * Add the LET assignments to the pipeline.
         */
        for (AssignmentNode n : assignments) {
            
            ordered.add(n);
            
        }

        /*
         * Add the post-conditionals to the pipeline.
         */
        for (IGroupMemberNode n : sa.getPostFilters(joinGroup)) {
            
            ordered.add(n);
            
        }
        
        final int arity = joinGroup.arity();

        if (ordered.size() != arity) {
    
            throw new AssertionError("should not be pruning any children");
            
        }

        // Replace the children with those in the [ordered] list.
        for (int i = 0; i < arity; i++) {

            joinGroup.setArg(i, (BOp) ordered.get(i));

        }

    }

}