/* * Copyright (C) 2016 SYSTAP, LLC DBA Blazegraph * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ package com.bigdata.rdf.sparql.ast.optimizers; import java.util.HashSet; import java.util.List; import java.util.Set; import com.bigdata.bop.BOp; import com.bigdata.bop.BOpUtility; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IValueExpression; import com.bigdata.bop.IVariable; import com.bigdata.bop.Var; import com.bigdata.bop.aggregate.AggregateBase; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.sparql.ast.AssignmentNode; import com.bigdata.rdf.sparql.ast.GraphPatternGroup; import com.bigdata.rdf.sparql.ast.IGroupMemberNode; import com.bigdata.rdf.sparql.ast.IQueryNode; import com.bigdata.rdf.sparql.ast.IValueExpressionNode; import com.bigdata.rdf.sparql.ast.NamedSubqueriesNode; import com.bigdata.rdf.sparql.ast.NamedSubqueryRoot; import com.bigdata.rdf.sparql.ast.OrderByExpr; import com.bigdata.rdf.sparql.ast.OrderByNode; import com.bigdata.rdf.sparql.ast.ProjectionNode; import com.bigdata.rdf.sparql.ast.QueryBase; import com.bigdata.rdf.sparql.ast.QueryNodeWithBindingSet; import com.bigdata.rdf.sparql.ast.QueryRoot; import com.bigdata.rdf.sparql.ast.QueryType; import com.bigdata.rdf.sparql.ast.SubqueryBase; import com.bigdata.rdf.sparql.ast.SubqueryRoot; import com.bigdata.rdf.sparql.ast.VarNode; import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext; import com.bigdata.rdf.sparql.ast.service.ServiceNode; /** * This compulsory AST transformation (not an optional optimizer!) enforces the * correct treatment of aggregates in ORDER BY clauses, according to the SPARQL * semantic, under the asumption that {@link com.bigdata.bop.solutions.MemorySortOp} does not have to deal * with aggregates. In a nutshell, this is done by introducing aliases for the * aggregate expressions and thus pushing the computation of the aggregates to * where they can already be processed. * * Simple example. * * Consider this query (sparql11-order-02.rq): * * SELECT ?type WHERE { ?subj a ?type } GROUP BY ?type ORDER BY (count(?subj)) * * It contains aggregate count(?subj) in ORDER BY. The idea is to rewrite it * into this query: * * SELECT ?type (count(?subj) as ?cnt)[excludeFromProjection] WHERE { ?subj a * ?type } GROUP BY ?type ORDER BY ?cnt * * Here ?cnt is an auxiliary alias, i.e, a fresh variable (UUIDs are used as * fresh variable names in the implementation). This query can be computed even * if the sorting does not support aggregates. * * Note that the rewritten query is not completely equivalent to the original * one: it will assign ?cnt, which does not occur in the original query. To * rectify this, some variables in a projection are designated as excluded from * the projection outputs: see the label "[excludeFromProjection]" by ?cnt in * the rewritten query (pseudo-SPARQL). * * * More complex example. * * The original query is: * * PREFIX ex: <http://example.org/> * SELECT ?x ?y WHERE { ?x ex:r ?y . ?y ex:q ?z } GROUP BY ?x ?y ORDER BY * DESC(max(?z)) ?x (count(?z)) DESC(?y) * * * The rewritten query is: * * * PREFIX ex: <http://example.org/> * SELECT ?x ?y (max(?z) AS ?maxz)[excludeFromProjection] (count(?z) AS * ?cntz)[excludeFromProjection] WHERE { ?x ex:r ?y . ?y ex:q ?z } GROUP BY ?x * ?y ORDER BY DESC(?maxz) ?x ?cntz DESC(?y) * * Here ?maxz and ?cntz are the introduced auxiliary aliases for the aggregates. * * @author <a href="mailto:ariazanov@blazegraph.com">Alexandre Riazanov</a> */ public class ASTOrderByAggregateFlatteningOptimizer implements IASTOptimizer { @Override public QueryNodeWithBindingSet optimize( final AST2BOpContext context, final QueryNodeWithBindingSet input) { final IQueryNode queryNode = input.getQueryNode(); final IBindingSet[] bindingSets = input.getBindingSets(); final QueryRoot queryRoot = (QueryRoot) queryNode; // First, process any pre-existing named subqueries. { final NamedSubqueriesNode namedSubqueries = queryRoot .getNamedSubqueries(); if (namedSubqueries != null) { // Note: works around concurrent modification error. final List<NamedSubqueryRoot> list = BOpUtility.toList( namedSubqueries, NamedSubqueryRoot.class); for (NamedSubqueryRoot namedSubquery : list) { // Rewrite the named sub-select doSelectQuery(context, namedSubquery); } } } // rewrite the top-level select doSelectQuery(context, (QueryRoot) queryNode); return new QueryNodeWithBindingSet(queryNode, bindingSets); } // optimize(final AST2BOpContext context,..) private void doSelectQuery(final AST2BOpContext context, final QueryBase queryBase) { // recursion first. doRecursiveRewrite(context, queryBase.getWhereClause()); if (queryBase.getQueryType() != QueryType.SELECT) { return; } final ProjectionNode projection = queryBase.getProjection(); final OrderByNode orderBy = queryBase.getOrderBy(); final OrderByNode newOrderBy = new OrderByNode(); boolean aggregatesPresent = false; if (orderBy == null) { // The transformation is not applicable here. return; } final Set<IVariable<?>> varsToExcludeFromProjection = new HashSet<IVariable<?>>(); for (OrderByExpr orderByExpr : orderBy) { IValueExpression<? extends IV> ve = orderByExpr.getValueExpression(); IValueExpressionNode ven = orderByExpr.getValueExpressionNode(); if (ve instanceof AggregateBase) { aggregatesPresent = true; final Var freshVar = Var.var(); final IValueExpressionNode newVEN = new VarNode(freshVar); final OrderByExpr newOrderByExpr = new OrderByExpr(newVEN, orderByExpr.isAscending()); newOrderBy.addExpr(newOrderByExpr); // E.g., COUNT(?subj) AS ?cnt // or MAX(?obj) AS ?mx final AssignmentNode replacementAlias = new AssignmentNode((VarNode) newVEN,ven); projection.addProjectionExpression(replacementAlias); varsToExcludeFromProjection.add(freshVar); } else { newOrderBy.addExpr(orderByExpr); } } // for (OrderByExpr orderByExpr : orderBy) projection.setVarsToExcludeFromProjection(varsToExcludeFromProjection); if (!aggregatesPresent) { // The transformation is not applicable here. return; } queryBase.setOrderBy(newOrderBy); } // doSelectQuery(final AST2BOpContext context, final QueryBase queryBase) /** * @param context * @param group possibly null, eg, when the enclosing query is a DESCRIBE */ private void doRecursiveRewrite(final AST2BOpContext context, final GraphPatternGroup<IGroupMemberNode> group) { if (group == null) { return; } final int arity = group.arity(); for (int i = 0; i < arity; i++) { final BOp child = (BOp) group.get(i); if (child instanceof GraphPatternGroup<?>) { // Recursion into groups. doRecursiveRewrite(context, ((GraphPatternGroup<IGroupMemberNode>) child)); } else if (child instanceof SubqueryRoot) { // Recursion into subqueries. final SubqueryRoot subqueryRoot = (SubqueryRoot) child; doRecursiveRewrite(context, subqueryRoot.getWhereClause()); // rewrite the sub-select doSelectQuery(context, (SubqueryBase) child); } else if (child instanceof ServiceNode) { // Do not rewrite things inside of a SERVICE node. continue; } } } // doRecursiveRewrite(final AST2BOpContext context,.. } // class ASTDummyOptimizer