/** * diqube: Distributed Query Base. * * Copyright (C) 2015 Bastian Gloeckle * * This file is part of diqube. * * diqube is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.diqube.plan.planner; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.function.Supplier; import java.util.stream.Collectors; import org.diqube.diql.request.ExecutionRequest; import org.diqube.diql.request.FunctionRequest; import org.diqube.diql.request.FunctionRequest.Type; import org.diqube.diql.request.ResolveValueRequest; import org.diqube.execution.ColumnVersionManager; import org.diqube.execution.ColumnVersionManagerFactory; import org.diqube.execution.ExecutablePlan; import org.diqube.execution.ExecutablePlanFactory; import org.diqube.execution.ExecutablePlanInfo; import org.diqube.execution.ExecutablePlanStep; import org.diqube.execution.consumers.ColumnValueConsumer; import org.diqube.execution.consumers.GroupIntermediaryAggregationConsumer; import org.diqube.execution.consumers.OverwritingRowIdConsumer; import org.diqube.execution.consumers.RowIdConsumer; import org.diqube.execution.consumers.TableFlattenedConsumer; import org.diqube.execution.steps.ExecuteRemotePlanOnShardsStep; import org.diqube.execution.steps.FlattenStep; import org.diqube.execution.steps.HavingResultStep; import org.diqube.executionenv.ExecutionEnvironment; import org.diqube.name.FlattenedTableNameUtil; import org.diqube.plan.PlannerColumnInfo; import org.diqube.plan.PlannerColumnInfoBuilder; import org.diqube.plan.RemoteExecutionPlanFactory; import org.diqube.plan.exception.PlanBuildException; import org.diqube.remote.cluster.thrift.RExecutionPlan; import org.diqube.remote.cluster.thrift.RExecutionPlanStep; import org.diqube.remote.cluster.thrift.RExecutionPlanStepDataType; import org.diqube.util.Pair; import org.diqube.util.TopologicalSort; /** * Uses a {@link ExecutionRequest} to build an {@link ExecutablePlan} that is executable on the query master node. That * {@link ExecutablePlan} though will contain a {@link ExecuteRemotePlanOnShardsStep} which in turn will execute parts * of the overall-plan on the other cluster nodes. * * @author Bastian Gloeckle */ public class ExecutionPlanner { private ExecutablePlanFactory executablePlanFactory; private RemoteExecutionPlanFactory remoteExecutionPlanFactory; private ColumnVersionManagerFactory columnVersionManagerFactory; private FlattenedTableNameUtil flattenedTableNameGenerator; private int nextMasterStepId = 0; private int nextRemoteStepId = 0; public ExecutionPlanner(ExecutablePlanFactory executablePlanFactory, RemoteExecutionPlanFactory remoteExecutionPlanFactory, ColumnVersionManagerFactory columnVersionManagerFactory, FlattenedTableNameUtil flattenedTableNameGenerator) { this.executablePlanFactory = executablePlanFactory; this.remoteExecutionPlanFactory = remoteExecutionPlanFactory; this.columnVersionManagerFactory = columnVersionManagerFactory; this.flattenedTableNameGenerator = flattenedTableNameGenerator; } /** * Executes planning. * * @param executionRequest * The input {@link ExecutionRequest} that was parsed from an diql stmt. * @param columnInfo * The columnInfo for all {@link FunctionRequest} columns in the {@link ExecutionRequest}, see * {@link PlannerColumnInfoBuilder}. * @param masterDefaultExecutionEnv * The {@link ExecutionEnvironment} to be used for those {@link ExecutablePlanStep}s that will be run on the * query master directly. * @return An {@link ExecutablePlan} that can be executed by the Query Master right away and which will distribute * some workload to the other cluster node. */ public ExecutablePlan plan(ExecutionRequest executionRequest, Map<String, PlannerColumnInfo> columnInfo, ExecutionEnvironment masterDefaultExecutionEnv) throws PlanBuildException { // ==== Initialize all helper objects // TODO #19 support selecting constants Set<String> resultColNamesRequested = executionRequest.getResolveValues().stream() .map(resolveReq -> resolveReq.getResolve().getColumnName()).collect(Collectors.toSet()); MasterWireManager masterWireManager = new MasterWireManager(); RemoteWireManager remoteWireManager = new RemoteWireManager(); List<RExecutionPlanStep> allRemoteSteps = new ArrayList<>(); List<ExecutablePlanStep> allMasterSteps = new ArrayList<>(); Supplier<Integer> nextMasterIdSupplier = () -> nextMasterStepId++; Supplier<Integer> nextRemoteIdSupplier = () -> nextRemoteStepId++; RemoteColumnManager remoteColManager = new RemoteColumnManager(nextRemoteIdSupplier, remoteExecutionPlanFactory, columnInfo, remoteWireManager); RemoteResolveManager remoteResolveManager = new RemoteResolveManager(nextRemoteIdSupplier, remoteColManager, remoteExecutionPlanFactory, remoteWireManager); ColumnVersionManager masterColumnVersionManager = columnVersionManagerFactory.createColumnVersionManager(masterDefaultExecutionEnv); MasterColumnManager masterColManager = new MasterColumnManager(masterDefaultExecutionEnv, nextMasterIdSupplier, executablePlanFactory, masterColumnVersionManager, columnInfo, remoteResolveManager, masterWireManager); MasterResolveManager masterResolveManager = new MasterResolveManager(nextMasterIdSupplier, masterDefaultExecutionEnv, executablePlanFactory, masterColManager, masterWireManager, resultColNamesRequested); // ==== Take care of columns that need to be created (e.g. by projection and aggregation) and feed this info into // column managers Set<String> columnNamesWorkedOn = new HashSet<>(); for (FunctionRequest fnReq : executionRequest.getProjectAndAggregate()) { if (columnNamesWorkedOn.contains(fnReq.getOutputColumn())) // remember: we want to calculate the same function with the same arguments only once. See class comment of // FunctionRequest. continue; columnNamesWorkedOn.add(fnReq.getOutputColumn()); if (columnInfo.get(fnReq.getOutputColumn()).isTransitivelyDependsOnRowAggregation()) { // the resulting col depends on a row that will be created by a row aggregation (GROUP BY). As the final values // of the group by is available only on the query master, we need to produce that new column only there, too. // Not that the validator made sure that this new Fn is not again a row Agg function or a col Agg function. masterColManager.produceColumn(fnReq); } else { if (fnReq.getType().equals(Type.AGGREGATION_ROW)) { // A row aggregation (GROUP BY aggregation). Both, remote and master need to do something: // Intermediate part of aggregation will be executed on cluster nodes, final part will be executed on query // master. remoteColManager.produceColumn(fnReq); masterColManager.produceColumn(fnReq); } else { // simple (REPEATED_)PROJECTION that is not based on a row Agg, or a column aggregation - all of these can be // executed on the remotes. remoteColManager.produceColumn(fnReq); } } } // The RowIdSink step will consume the results (=rowIDs) of the WHERE stmts if there are any. This step outputs the // rowIDs that other steps later on can rely on. If no input rowIds are provided, this will simply return /all/ // rowIds (= no WHERE stmt). RExecutionPlanStep remoteRowSinkStep = remoteExecutionPlanFactory.createRowIdSink(nextRemoteIdSupplier.get()); allRemoteSteps.add(remoteRowSinkStep); // ==== Create remote steps for WHERE clause if (executionRequest.getWhere() != null) { WhereBuilder whereHandler = new WhereBuilder(remoteExecutionPlanFactory, nextRemoteIdSupplier, remoteColManager, remoteWireManager); Pair<RExecutionPlanStep, List<RExecutionPlanStep>> whereResult = whereHandler.build(executionRequest.getWhere()); // let the WHERE steps provide the Row IDs to the RowIdSink. remoteWireManager.wire(RowIdConsumer.class, whereResult.getLeft(), remoteRowSinkStep); allRemoteSteps.addAll(whereResult.getRight()); } // Row Sink provides rowIDs to work on RExecutionPlanStep remoteRowIdSourceStep = remoteRowSinkStep; // ==== Create remote steps for a potential GROUP clause (master steps will be created below). if (executionRequest.getGroup() != null) { RExecutionPlanStep groupStep = remoteExecutionPlanFactory.createGroup(executionRequest.getGroup(), nextRemoteIdSupplier.get()); for (String groupByCol : executionRequest.getGroup().getGroupColumns()) { // (1) make sure the columnBuiltConsumer is wired to the group step so the latter does not start too early remoteColManager.wireOutputOfColumnIfAvailable(groupByCol, groupStep); // (2) make sure that the values of the group by columns are sent to the master - these are needed for the // group ID adjustment step! We do not need to build a column for that (the GroupIdAdjust step just consumes the // values), but we definitely need to send the values! remoteResolveManager.resolveValuesOfColumn(groupByCol); } // group step consumes rowIDs provided by previous RowID consumer and provides the RowIDs for every future step // (because it will drastically reduce number of rowIds, because it merges multiple rows into groups -> groupIds // are rowIds). remoteWireManager.wire(RowIdConsumer.class, remoteRowIdSourceStep, groupStep); remoteRowIdSourceStep = groupStep; allRemoteSteps.add(groupStep); // the group step has to provide its data to all Group Intermediary aggregation steps. remoteColManager.wireGroupInput(groupStep); // TODO #24 we should make sure that results form GroupIntermediateAggregate steps are piped through an order step // in order to do a row-id cut-off. } ExecutablePlanStep masterRowIdSourceStep = null; ExecutablePlanStep masterRowIdStartStep = null; // ==== Feed information about ORDER into both, remote- and master- OrderHandler. if (executionRequest.getOrder() != null) { RemoteOrderHandler remoteOrderHandler = new RemoteOrderHandler(columnInfo, nextRemoteIdSupplier, remoteExecutionPlanFactory, remoteColManager); RExecutionPlanStep remoteOrderStep = remoteOrderHandler.build(executionRequest.getOrder()); if (remoteOrderStep != null) { // order step consumes rowIDs provided by previous RowID consumer and provides the RowIDs for every future step, // because it might reduce number of rowIDs (because of a LIMIT clause). remoteWireManager.wire(RowIdConsumer.class, remoteRowIdSourceStep, remoteOrderStep); remoteRowIdSourceStep = remoteOrderStep; allRemoteSteps.add(remoteOrderStep); } MasterOrderHandler masterOrderHandler = new MasterOrderHandler(executablePlanFactory, nextMasterIdSupplier, masterDefaultExecutionEnv, masterColManager); ExecutablePlanStep masterOrderStep = masterOrderHandler.build(executionRequest.getOrder()); masterRowIdSourceStep = masterOrderStep; masterRowIdStartStep = masterOrderStep; allMasterSteps.add(masterOrderStep); } // ==== Make sure the values of the requested columns are resolved so they can be provided to the user. for (ResolveValueRequest resolveValue : executionRequest.getResolveValues()) { // TODO #19 support resolving constants String colName = resolveValue.getResolve().getColumnName(); if (masterColManager.isColumnProduced(colName)) masterResolveManager.resolveValuesOfColumn(colName); else // If the values are not produced on the master (=aggregations on master or projections thereof), those values // are available on the remotes. Make sure that we resolve them there - they will then automatically be sent to // the master from the remote and the master will automatically pass them on to a // FilterRequestedColumnsAndActiveRowIdsStep (which in turn will hand them over to the caller). remoteResolveManager.resolveValuesOfColumn(colName); } masterColManager.prepareBuild(); // ==== Start building execution plan for remotes. allRemoteSteps.addAll(remoteResolveManager.build(remoteRowIdSourceStep)); remoteColManager.prepareBuild(); allRemoteSteps.addAll(remoteColManager.build()); // do topological sort to make plans easier readable and support faster execution (because threads of first steps // will start to run first). Map<Integer, RExecutionPlanStep> idToRemoteSteps = new HashMap<>(); for (RExecutionPlanStep remoteStep : allRemoteSteps) idToRemoteSteps.put(remoteStep.getStepId(), remoteStep); Map<Integer, Integer> remoteIdChangeMap = new HashMap<>(); TopologicalSort<RExecutionPlanStep> remoteTopSort = new TopologicalSort<RExecutionPlanStep>( // step -> { if (step.getProvideDataForSteps() != null) { return step.getProvideDataForSteps().keySet().stream().map(idx -> idToRemoteSteps.get(idx)) .collect(Collectors.toList()); } return new ArrayList<>(); } , // step -> (long) step.getStepId(), // (step, newIdx) -> remoteIdChangeMap.put(step.getStepId(), newIdx)); allRemoteSteps = remoteTopSort.sort(allRemoteSteps); // Adjust Ids of steps according to top sort. for (RExecutionPlanStep remoteStep : allRemoteSteps) { if (remoteStep.getProvideDataForSteps() != null) { Map<Integer, List<RExecutionPlanStepDataType>> newProvideDataForSteps = new HashMap<>(); for (Entry<Integer, List<RExecutionPlanStepDataType>> originalEntry : remoteStep.getProvideDataForSteps() .entrySet()) newProvideDataForSteps.put(remoteIdChangeMap.get(originalEntry.getKey()), originalEntry.getValue()); remoteStep.setProvideDataForSteps(newProvideDataForSteps); } remoteStep.setStepId(remoteIdChangeMap.get(remoteStep.getStepId())); } // Build remote execution plan RExecutionPlan remoteExecutionPlan = remoteExecutionPlanFactory.createExecutionPlan(allRemoteSteps, executionRequest.getFromRequest()); // ==== Build execution plan for master node. // If flattened, be sure to trigger flattening correctly. FlattenStep flattenStep = null; if (executionRequest.getFromRequest().isFlattened()) { flattenStep = executablePlanFactory.createFlattenStep(nextMasterIdSupplier.get(), executionRequest.getFromRequest().getTable(), executionRequest.getFromRequest().getFlattenByField()); allMasterSteps.add(flattenStep); } // Make query master execute remote execution plan on remotes. ExecutablePlanStep executeRemoteStep = executablePlanFactory.createExecuteRemotePlanStep(nextMasterIdSupplier.get(), masterDefaultExecutionEnv, remoteExecutionPlan); allMasterSteps.add(executeRemoteStep); if (flattenStep != null) masterWireManager.wire(TableFlattenedConsumer.class, flattenStep, executeRemoteStep); // ==== Handle a GROUP and aggregation functions on master boolean rowAggregateFunctionsAvailable = columnInfo.values().stream() .anyMatch(colInfo -> colInfo.getType().equals(FunctionRequest.Type.AGGREGATION_ROW)); if (executionRequest.getGroup() != null && rowAggregateFunctionsAvailable) { // TODO #37: Query is probably wrong if it has "group" but no aggregation_row funcs. Inform user. // We are grouping and executing aggregation functions, that means that cluster nodes will reply with group // intermediary updates to the query master. // The groupIds used by the cluster nodes though are the row IDs of one of the rows contained in a group - which // means they are only valid locally to a cluster node: The same group (= the same values for the grouped columns) // will end up having different groupIds on each cluster node. We add this groupid adjusting step on the query // master to merge the groupIds. ExecutablePlanStep groupIdAdjustStep = executablePlanFactory.createGroupIdAdjustingStep( nextMasterIdSupplier.get(), new HashSet<>(executionRequest.getGroup().getGroupColumns())); // wire twice because the adjust step needs both, group Id intermediate information and column values from the // executeRemoteStep. masterWireManager.wire(ColumnValueConsumer.class, executeRemoteStep, groupIdAdjustStep); masterWireManager.wire(GroupIntermediaryAggregationConsumer.class, executeRemoteStep, groupIdAdjustStep); allMasterSteps.add(groupIdAdjustStep); // After merging the groupIds, that step will provide the updates to any group aggregation finalization functions // on the query master. masterColManager.wireGroupInput(groupIdAdjustStep); if (executionRequest.getHaving() != null) { HavingBuilder havingBuilder = new HavingBuilder(executablePlanFactory, nextMasterIdSupplier, masterColManager, masterDefaultExecutionEnv, masterWireManager); Pair<ExecutablePlanStep, List<ExecutablePlanStep>> p = havingBuilder.build(executionRequest.getHaving()); HavingResultStep havingResultStep = executablePlanFactory.createHavingResultStep(nextMasterIdSupplier.get()); masterWireManager.wire(OverwritingRowIdConsumer.class, p.getLeft(), havingResultStep); allMasterSteps.addAll(p.getRight()); allMasterSteps.add(havingResultStep); } if (masterRowIdStartStep != null) { masterWireManager.wire(RowIdConsumer.class, groupIdAdjustStep, masterRowIdStartStep); masterRowIdStartStep = groupIdAdjustStep; } else { masterRowIdStartStep = groupIdAdjustStep; masterRowIdSourceStep = groupIdAdjustStep; } } else if (masterRowIdStartStep != null) // we have a specific step that wants to consume all row IDs and is != to the group id adjust step (which does not // need a RowIdConsumer input), so wire it to the RowIdConsumer output of executeRemoteStep. masterWireManager.wire(RowIdConsumer.class, executeRemoteStep, masterRowIdStartStep); if (masterRowIdSourceStep == null) // we do not have a specific source of RowIDs for all resolving steps etc, just use all the row ids provided by // cluster nodes. masterRowIdSourceStep = executeRemoteStep; masterResolveManager.provideColumnValueSourceStep(executeRemoteStep); masterColManager.provideColumnValuesProvidingStep(executeRemoteStep); allMasterSteps.addAll(masterResolveManager.build(masterRowIdSourceStep)); allMasterSteps.addAll(masterColManager.build()); Map<Integer, Set<Integer>> masterWires = masterWireManager.buildFinalWireMap(allMasterSteps); // top sort masters plan, too Map<Integer, ExecutablePlanStep> idToMasterSteps = new HashMap<>(); for (ExecutablePlanStep masterStep : allMasterSteps) idToMasterSteps.put(masterStep.getStepId(), masterStep); TopologicalSort<ExecutablePlanStep> masterTopSort = new TopologicalSort<ExecutablePlanStep>( // step -> { if (masterWires.containsKey(step.getStepId())) return masterWires.get(step.getStepId()).stream().map(idx -> idToMasterSteps.get(idx)) .collect(Collectors.toList()); return new ArrayList<>(); } , // step -> (long) step.getStepId(), // (step, newIdx) -> step.setStepId(newIdx)); allMasterSteps = masterTopSort.sort(allMasterSteps); // masterWires is invalid now! // TODO #19 support selecting non-cols ExecutablePlanInfo info = createInfo(executionRequest); ExecutablePlan plan = executablePlanFactory.createExecutablePlan(masterDefaultExecutionEnv, allMasterSteps, info, masterColumnVersionManager); return plan; } private ExecutablePlanInfo createInfo(ExecutionRequest executionRequest) { List<String> selectedCols = executionRequest.getResolveValues().stream() .map(res -> res.getResolve().getColumnName()).collect(Collectors.toList()); List<String> selectionRequests = executionRequest.getResolveValues().stream().map(res -> res.getRequestString()).collect(Collectors.toList()); boolean isOrdered = executionRequest.getOrder() != null; boolean isGrouped = executionRequest.getGroup() != null; boolean having = executionRequest.getHaving() != null; return executablePlanFactory.createExecutablePlanInfo(selectedCols, selectionRequests, isOrdered, isGrouped, having); } }