/** * diqube: Distributed Query Base. * * Copyright (C) 2015 Bastian Gloeckle * * This file is part of diqube. * * diqube is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.diqube.plan.validate; import java.util.Collection; import java.util.Map; import java.util.function.Consumer; import org.diqube.diql.request.ComparisonRequest.Leaf; import org.diqube.diql.request.ExecutionRequest; import org.diqube.diql.request.FunctionRequest; import org.diqube.diql.request.ResolveValueRequest; import org.diqube.plan.PlannerColumnInfo; import org.diqube.plan.exception.ValidationException; import org.diqube.util.ColumnOrValue.Type; import org.diqube.util.Pair; /** * Validates an {@link ExecutionRequest}. * * @author Bastian Gloeckle */ public class DefaultExecutionRequestValidator implements ExecutionRequestValidator { @Override public void validate(ExecutionRequest executionRequest, Map<String, PlannerColumnInfo> colInfos) throws ValidationException { validateFrom(executionRequest); validateWhere(executionRequest, colInfos); havingNeedsGroupBy(executionRequest); validateHaving(executionRequest, colInfos); noAggregationOnAggregation(colInfos); rowAggregationNeedsGroup(executionRequest, colInfos); validateLimit(executionRequest); anyResultColumn(executionRequest); noArrayResultResolveWhereHavingGroupOrder(executionRequest, colInfos); orderByColumnsOnly(executionRequest, colInfos); validateGroupBy(executionRequest, colInfos); validateRepeatedProjections(executionRequest, colInfos); // TODO #23 validate if functions are used correctly (= correct number of params, correct types) } /** * Calidates that there is a "from" clause. This is not ensured by the grammar itself, because the grammer is used for * parsing partial diql statements which lack a FROM in diqube-ui. */ private void validateFrom(ExecutionRequest executionRequest) throws ValidationException { if (executionRequest.getFromRequest() == null || executionRequest.getFromRequest().getTable() == null || executionRequest.getFromRequest().getTable().equals("") || (executionRequest.getFromRequest().isFlattened() && (executionRequest.getFromRequest().getFlattenByField() == null || executionRequest.getFromRequest().getFlattenByField().equals("")))) throw new ValidationException("No FROM clause or invalid FLATTEN specified."); } /** * Order By is not allowed when the parameters are effectively only literals. */ private void orderByColumnsOnly(ExecutionRequest executionRequest, Map<String, PlannerColumnInfo> colInfos) throws ValidationException { if (executionRequest.getOrder() != null) { for (Pair<String, Boolean> orderPair : executionRequest.getOrder().getColumns()) { String colName = orderPair.getLeft(); if (colInfos.get(colName) != null && colInfos.get(colName).isTransitivelyDependsOnLiteralsOnly()) throw new ValidationException( "ORDER clause with function '" + colInfos.get(colName).getProvidedByFunctionRequest().getFunctionName() + "' depending on literals only, please use columnar values."); } } } private void anyResultColumn(ExecutionRequest executionRequest) throws ValidationException { if (executionRequest.getResolveValues() == null || executionRequest.getResolveValues().size() == 0) throw new ValidationException("No result columns speicified."); } private void noArrayResultResolveWhereHavingGroupOrder(ExecutionRequest executionRequest, Map<String, PlannerColumnInfo> colInfos) throws ValidationException { for (ResolveValueRequest r : executionRequest.getResolveValues()) { if (r.getResolve().getType().equals(Type.COLUMN)) { if (colInfos.containsKey(r.getResolve().getColumnName()) && colInfos.get(r.getResolve().getColumnName()).isArrayResult()) throw new ValidationException("Function '" + colInfos.get(r.getResolve().getColumnName()).getProvidedByFunctionRequest().getFunctionName() + "' is a function that returns not a single but multiple values ([*] syntax). " + "This cannot be SELECTed directly. You might want to aggregate those values " + "using a column-aggregation function."); } } if (executionRequest.getWhere() != null) { Collection<Leaf> whereLeafs = executionRequest.getWhere().findRecursivelyAllOfType(Leaf.class); for (Leaf l : whereLeafs) { FunctionRequest badFr = null; if ((colInfos.containsKey(l.getLeftColumnName()) && colInfos.get(l.getLeftColumnName()).isArrayResult())) badFr = colInfos.get(l.getLeftColumnName()).getProvidedByFunctionRequest(); if (badFr != null && l.getRight().getType().equals(Type.COLUMN) && colInfos.containsKey(l.getRight().getColumnName()) && colInfos.get(l.getRight().getColumnName()).isArrayResult()) badFr = colInfos.get(l.getRight().getColumnName()).getProvidedByFunctionRequest(); if (badFr != null) throw new ValidationException("Function '" + badFr.getFunctionName() + "' is a function that returns not a single but multiple values ([*] syntax). " + "This cannot be used in a comparison in the WHERE clause. You may want to aggregate those values " + "using a column-aggregation function."); } } if (executionRequest.getHaving() != null) { Collection<Leaf> whereLeafs = executionRequest.getHaving().findRecursivelyAllOfType(Leaf.class); for (Leaf l : whereLeafs) { FunctionRequest badFr = null; if ((colInfos.containsKey(l.getLeftColumnName()) && colInfos.get(l.getLeftColumnName()).isArrayResult())) badFr = colInfos.get(l.getLeftColumnName()).getProvidedByFunctionRequest(); if (badFr != null && l.getRight().getType().equals(Type.COLUMN) && colInfos.containsKey(l.getRight().getColumnName()) && colInfos.get(l.getRight().getColumnName()).isArrayResult()) badFr = colInfos.get(l.getRight().getColumnName()).getProvidedByFunctionRequest(); if (badFr != null) throw new ValidationException("Function '" + badFr.getFunctionName() + "' is a function that returns not a single but multiple values ([*] syntax). " + "This cannot be used in a comparison in the HAVING clause. You may want to aggregate those values " + "using a column-aggregation function."); } } if (executionRequest.getGroup() != null) { for (String groupCol : executionRequest.getGroup().getGroupColumns()) { if ((colInfos.containsKey(groupCol) && colInfos.get(groupCol).isArrayResult())) throw new ValidationException("Function '" + colInfos.get(groupCol).getProvidedByFunctionRequest() + "' is a function that returns not a single but multiple values ([*] syntax). " + "This cannot be used in the GROUP BY clause. You may want to aggregate those values " + "using a column-aggregation function."); } } if (executionRequest.getOrder() != null) { for (Pair<String, Boolean> orderPair : executionRequest.getOrder().getColumns()) { String orderCol = orderPair.getLeft(); if ((colInfos.containsKey(orderCol) && colInfos.get(orderCol).isArrayResult())) throw new ValidationException("Function '" + colInfos.get(orderCol).getProvidedByFunctionRequest() + "' is a function that returns not a single but multiple values ([*] syntax). " + "This cannot be used in the ORDER BY clause. You may want to aggregate those values " + "using a column-aggregation function."); } } } private void validateLimit(ExecutionRequest executionRequest) throws ValidationException { if (executionRequest.getOrder() != null && executionRequest.getOrder().getLimit() != null) { if (executionRequest.getOrder().getLimit() < 1) throw new ValidationException("LIMIT needs to be at least 1."); if (executionRequest.getOrder().getLimitStart() != null && executionRequest.getOrder().getLimitStart() < 0) throw new ValidationException("LIMIT START needs to be at least 0."); } } /** * A having clause is only valid if there is a group by clause. */ private void havingNeedsGroupBy(ExecutionRequest executionRequest) throws ValidationException { if (executionRequest.getHaving() != null && executionRequest.getGroup() == null) // should never happen because of ANTLR grammar, but to be sure... throw new ValidationException("HAVING clause only supported when there is a GROUP BY."); } /** * When using eaggregation functions, there needs to be a Group By clause. */ private void rowAggregationNeedsGroup(ExecutionRequest executionRequest, Map<String, PlannerColumnInfo> colInfos) throws ValidationException { long numberOfAggregationFunctions = colInfos.values().stream() .filter(colInfo -> colInfo.getType().equals(FunctionRequest.Type.AGGREGATION_ROW)).count(); if (numberOfAggregationFunctions > 0 && executionRequest.getGroup() == null) throw new ValidationException("There are " + numberOfAggregationFunctions + " aggregation functions used, but there is no GROUP BY clause."); } /** * There must be no row aggregation function be applied on an already row aggregated column. The same is true for col * aggregated columns. In addition to that it is not valid to have a col aggreation based on a row aggregation (only * the other way round!). */ private void noAggregationOnAggregation(Map<String, PlannerColumnInfo> colInfos) throws ValidationException { for (PlannerColumnInfo colInfo : colInfos.values()) { if (colInfo.getType().equals(FunctionRequest.Type.AGGREGATION_ROW) && colInfo.isTransitivelyDependsOnRowAggregation()) throw new ValidationException( "Use of row aggregation function '" + colInfo.getProvidedByFunctionRequest().getFunctionName() + "' is based on the result of at least one other row aggregation function. This is invalid."); if (colInfo.getType().equals(FunctionRequest.Type.AGGREGATION_COL) && colInfo.isTransitivelyDependsOnColAggregation()) throw new ValidationException( "Use of columns aggregation function '" + colInfo.getProvidedByFunctionRequest().getFunctionName() + "' is based on the result of at least one other column aggregation function. This is invalid."); if (colInfo.getType().equals(FunctionRequest.Type.AGGREGATION_COL) && colInfo.isTransitivelyDependsOnRowAggregation()) throw new ValidationException( "Use of columns aggregation function '" + colInfo.getProvidedByFunctionRequest().getFunctionName() + "' is based on the result of at least one row aggregation function. This is invalid."); } } private void validateWhere(ExecutionRequest executionRequest, Map<String, PlannerColumnInfo> colInfos) { if (executionRequest.getWhere() != null) { Collection<Leaf> leafs = executionRequest.getWhere().findRecursivelyAllOfType(Leaf.class); Consumer<String> validateCol = colName -> { if (colInfos.containsKey(colName) // could be that there is no colInfo if it's no generated // column. && (colInfos.get(colName).isTransitivelyDependsOnRowAggregation() || colInfos.get(colName).getType().equals(FunctionRequest.Type.AGGREGATION_ROW))) // note: col aggregations are executed on the query remotes, therefore they are fine in WHERE. throw new ValidationException( "Function '" + colInfos.get(colName).getProvidedByFunctionRequest().getFunctionName() + "' is in WHERE clause and either is a row aggregation function or relies on the " + "result of a row aggregation function. Aggregation functions can only be used in a HAVING clause."); }; for (Leaf leaf : leafs) { validateCol.accept(leaf.getLeftColumnName()); if (leaf.getRight().getType().equals(Type.COLUMN)) validateCol.accept(leaf.getRight().getColumnName()); } } } private void validateHaving(ExecutionRequest executionRequest, Map<String, PlannerColumnInfo> colInfos) { if (executionRequest.getHaving() != null) { Collection<Leaf> leafs = executionRequest.getHaving().findRecursivelyAllOfType(Leaf.class); Consumer<String> validateCol = colName -> { if (!colInfos.containsKey(colName)) { // Not a generated column, therefore no function, therefore not aggregated! // TODO #112 throw new ValidationException("Cannot use column '" + colName + "' in HAVING, since it is not aggregated."); } else if ((!colInfos.get(colName).isTransitivelyDependsOnRowAggregation() && !colInfos.get(colName).getType().equals(FunctionRequest.Type.AGGREGATION_ROW))) // note: col aggregations are executed on the query remotes, therefore they need to be in WHERE. throw new ValidationException( "Function '" + colInfos.get(colName).getProvidedByFunctionRequest().getFunctionName() + "' is in HAVING clause but it is not depending on the result of a row aggregation. For performance " + "reasons, this restriction has to be used in a WHERE clause."); }; for (Leaf leaf : leafs) { validateCol.accept(leaf.getLeftColumnName()); if (leaf.getRight().getType().equals(Type.COLUMN)) validateCol.accept(leaf.getRight().getColumnName()); } } } private void validateGroupBy(ExecutionRequest executionRequest, Map<String, PlannerColumnInfo> colInfos) { if (executionRequest.getGroup() != null) { for (String groupByCol : executionRequest.getGroup().getGroupColumns()) { if (!colInfos.containsKey(groupByCol)) continue; if (colInfos.get(groupByCol).isTransitivelyDependsOnRowAggregation() || colInfos.get(groupByCol).getType().equals(FunctionRequest.Type.AGGREGATION_ROW)) // we can aggregate on col aggregation functions, as these are calculated on the query remotes! throw new ValidationException("Cannot group on row aggregation functions."); if (colInfos.get(groupByCol).isTransitivelyDependsOnLiteralsOnly()) throw new ValidationException("Cannot group on projections that are based on constants only."); } } } private void validateRepeatedProjections(ExecutionRequest executionRequest, Map<String, PlannerColumnInfo> colInfos) { for (FunctionRequest funcReq : executionRequest.getProjectAndAggregate()) { PlannerColumnInfo colInfo = colInfos.get(funcReq.getOutputColumn()); if (colInfo != null && colInfo.isArrayResult()) { if (colInfo.isTransitivelyDependsOnRowAggregation()) throw new ValidationException("Execution of column projection function '" + funcReq.getFunctionName() + "' is based on the calculation of a row-wise aggregation (GROUP BY). " + "This is not possible for projections that do a column wise projection ('[*]' syntax)."); } else { // validate that REPEATED_PROJECTIONS are only used as children of other REPEATED_PROJECTION or AGGREGATION_COL // steps. boolean dependsOnArrayResult = colInfo.getColumnsDependingOnThis().stream() .anyMatch(s -> colInfos.containsKey(s) && colInfos.get(s).isArrayResult()); if (dependsOnArrayResult && !(funcReq.getType().equals(FunctionRequest.Type.AGGREGATION_COL) || funcReq.getType().equals(FunctionRequest.Type.REPEATED_PROJECTION))) throw new ValidationException("Function '" + funcReq.getFunctionName() + "' is based on the result of a function which provides not a single result but " + "multiple ([*] syntax). That is not supported here."); } } } }