/** * diqube: Distributed Query Base. * * Copyright (C) 2015 Bastian Gloeckle * * This file is part of diqube. * * diqube is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.diqube.plan; import java.util.ArrayList; import java.util.Collection; import java.util.Deque; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import org.diqube.diql.request.ExecutionRequest; import org.diqube.diql.request.FunctionRequest; import org.diqube.diql.request.ComparisonRequest.Leaf; import org.diqube.diql.request.FunctionRequest.Type; import org.diqube.util.ColumnOrValue; /** * Builds a map containing {@link PlannerColumnInfo}s, for each created column (by projection or aggregation functions) * one. * * <p> * Please note that the resulting Map will of course contain only /one/ {@link PlannerColumnInfo} per column, although * it might happen that there are multiple {@link FunctionRequest}s that are equal to each other - only the last will be * effectively included in the result map. This is fine, though, because the output column names are unique to the * operation the function executes - that means if there are two {@link FunctionRequest}s available that execute the * same logic, the output column name will be equal - in the end we do not want to execute the logic twice, though, but * only once, so we would need to remove one call anyway. * * @author Bastian Gloeckle */ public class PlannerColumnInfoBuilder { private ExecutionRequest executionRequest; public PlannerColumnInfoBuilder withExecutionRequest(ExecutionRequest executionRequest) { this.executionRequest = executionRequest; return this; } /** * See class comment of {@link PlannerColumnInfoBuilder}. */ public Map<String, PlannerColumnInfo> build() { Map<String, PlannerColumnInfo> res = new HashMap<>(); Set<String> columnNamesUsedInHaving = new HashSet<>(); if (executionRequest.getHaving() != null) { Collection<Leaf> leafs = executionRequest.getHaving().findRecursivelyAllOfType(Leaf.class); for (Leaf leaf : leafs) { columnNamesUsedInHaving.add(leaf.getLeftColumnName()); if (leaf.getRight().getType().equals(ColumnOrValue.Type.COLUMN)) columnNamesUsedInHaving.add(leaf.getRight().getColumnName()); } } List<PlannerColumnInfo> rowAggregationFunctions = new ArrayList<>(); List<PlannerColumnInfo> colAggregationFunctions = new ArrayList<>(); Deque<PlannerColumnInfo> literalOnlyFunctions = new LinkedList<>(); // go through all FunctionRequest. If there are multiple requests with the same output col name, the latter will // overwrite the earlier -> We end up creating each column only once. for (FunctionRequest func : executionRequest.getProjectAndAggregate()) { PlannerColumnInfo info = new PlannerColumnInfo(func.getOutputColumn()); info.setType(func.getType()); info.setProvidedByFunctionRequest(func); info.setUsedInHaving(columnNamesUsedInHaving.contains(func.getOutputColumn())); if (func.getType().equals(Type.AGGREGATION_ROW)) { rowAggregationFunctions.add(info); info.setTransitivelyDependsOnLiteralsOnly(false); } else if (func.getType().equals(Type.AGGREGATION_COL)) { colAggregationFunctions.add(info); info.setTransitivelyDependsOnLiteralsOnly(false); } boolean foundColumn = false; for (ColumnOrValue param : func.getInputParameters()) { if (param.getType().equals(ColumnOrValue.Type.COLUMN)) { foundColumn = true; // add backward dependency info.getDependsOnColumns().add(param.getColumnName()); } } if (func.getType().equals(Type.PROJECTION)) { info.setTransitivelyDependsOnLiteralsOnly(!foundColumn); if (!foundColumn) literalOnlyFunctions.add(info); } if (func.getType().equals(Type.REPEATED_PROJECTION)) { info.setTransitivelyDependsOnLiteralsOnly(false); // cannot be, as there is at least one repeated col as param. info.setArrayResult(true); } else info.setArrayResult(false); res.put(info.getName(), info); } for (PlannerColumnInfo colInfo : res.values()) { // create forward dependencies Iterator<String> otherColIt = colInfo.getDependsOnColumns().iterator(); while (otherColIt.hasNext()) { String otherCol = otherColIt.next(); if (!res.containsKey(otherCol)) // we do not build that column in the ExecutionRequest - expect it to be a column in the TableShard directly. otherColIt.remove(); else res.get(otherCol).getColumnsDependingOnThis().add(colInfo.getName()); } } // resolve transitive row aggregation functions Deque<PlannerColumnInfo> transitiveRowAggregationFunctions = new LinkedList<>(); // find children of 'top level row agg functions' - if the do not (transitively) depend on other agg functions, they // are NOT 'transitive row agg functions'! for (PlannerColumnInfo aggregationFunction : rowAggregationFunctions) for (String dependingColName : aggregationFunction.getColumnsDependingOnThis()) transitiveRowAggregationFunctions.add(res.get(dependingColName)); while (!transitiveRowAggregationFunctions.isEmpty()) { PlannerColumnInfo transitiveAggFunction = transitiveRowAggregationFunctions.poll(); transitiveAggFunction.setTransitivelyDependsOnRowAggregation(true); for (String dependingColName : transitiveAggFunction.getColumnsDependingOnThis()) transitiveRowAggregationFunctions.add(res.get(dependingColName)); } // resolve transitive col aggregation functions Deque<PlannerColumnInfo> transitiveColAggregationFunctions = new LinkedList<>(); // find children of 'top level row agg functions' - if the do not (transitively) depend on other agg functions, they // are NOT 'transitive row agg functions'! for (PlannerColumnInfo aggregationFunction : colAggregationFunctions) for (String dependingColName : aggregationFunction.getColumnsDependingOnThis()) transitiveColAggregationFunctions.add(res.get(dependingColName)); while (!transitiveColAggregationFunctions.isEmpty()) { PlannerColumnInfo transitiveAggFunction = transitiveColAggregationFunctions.poll(); transitiveAggFunction.setTransitivelyDependsOnColAggregation(true); for (String dependingColName : transitiveAggFunction.getColumnsDependingOnThis()) transitiveColAggregationFunctions.add(res.get(dependingColName)); } // resolve transitive literal functions Map<String, Set<String>> functionDependsOn = new HashMap<>(); for (PlannerColumnInfo colInfo : res.values()) functionDependsOn.put(colInfo.getName(), new HashSet<String>(colInfo.getDependsOnColumns())); while (!literalOnlyFunctions.isEmpty()) { PlannerColumnInfo literalFunction = literalOnlyFunctions.poll(); literalFunction.setTransitivelyDependsOnLiteralsOnly(true); for (String dependingColName : literalFunction.getColumnsDependingOnThis()) { functionDependsOn.get(dependingColName).remove(literalFunction.getName()); if (functionDependsOn.get(dependingColName).isEmpty()) literalOnlyFunctions.add(res.get(dependingColName)); } } return res; } }