/**
* diqube: Distributed Query Base.
*
* Copyright (C) 2015 Bastian Gloeckle
*
* This file is part of diqube.
*
* diqube is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.diqube.plan.planner;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.function.Consumer;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import org.diqube.diql.request.FunctionRequest;
import org.diqube.execution.ColumnVersionManager;
import org.diqube.execution.ExecutablePlanFactory;
import org.diqube.execution.ExecutablePlanStep;
import org.diqube.execution.consumers.ColumnBuiltConsumer;
import org.diqube.execution.consumers.ColumnValueConsumer;
import org.diqube.execution.consumers.ColumnVersionBuiltConsumer;
import org.diqube.execution.consumers.GroupIntermediaryAggregationConsumer;
import org.diqube.execution.steps.BuildColumnFromValuesStep;
import org.diqube.execution.steps.GroupFinalAggregationStep;
import org.diqube.execution.steps.ProjectStep;
import org.diqube.executionenv.ExecutionEnvironment;
import org.diqube.plan.PlannerColumnInfo;
import org.diqube.util.ColumnOrValue;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
/**
* {@link ColumnManager} for the Query Master node.
*
* @author Bastian Gloeckle
*/
public class MasterColumnManager implements ColumnManager<ExecutablePlanStep> {
private Map<String, List<ExecutablePlanStep>> functionMasterSteps = new HashMap<>();
private ExecutionEnvironment env;
private Supplier<Integer> nextMasterStepIdSupplier;
private ExecutablePlanFactory executablePlanFactory;
private Map<String, PlannerColumnInfo> columnInfo;
private Set<String> columnsThatNeedToBeAvailable = new HashSet<>();
private Map<String, List<ExecutablePlanStep>> delayedWires = new HashMap<>();
private RemoteResolveManager remoteResolveManager;
private MasterWireManager masterWireManager;
private ExecutablePlanStep columnValuesProvidingStep;
private ColumnVersionManager columnVersionManager;
/**
* @param remoteResolveManager
* This {@link RemoteResolveManager} will be fed with those columns that need to be available on the master
* and therefore need to be resolved on the remotes. Please note the JavaDoc of {@link #prepareBuild()}.
*/
public MasterColumnManager(ExecutionEnvironment masterExecutuionEnvironment,
Supplier<Integer> nextMasterStepIdSupplier, ExecutablePlanFactory executablePlanFactory,
ColumnVersionManager columnVersionManager, Map<String, PlannerColumnInfo> columnInfo,
RemoteResolveManager remoteResolveManager, MasterWireManager masterWireManager) {
this.env = masterExecutuionEnvironment;
this.nextMasterStepIdSupplier = nextMasterStepIdSupplier;
this.executablePlanFactory = executablePlanFactory;
this.columnInfo = columnInfo;
this.remoteResolveManager = remoteResolveManager;
this.masterWireManager = masterWireManager;
this.columnVersionManager = columnVersionManager;
}
@Override
public void produceColumn(FunctionRequest fnReq) {
if (fnReq.getType().equals(FunctionRequest.Type.PROJECTION)) {
ProjectStep projectStep = executablePlanFactory.createProjectStep(nextMasterStepIdSupplier.get(), env,
fnReq.getFunctionName(), fnReq.getOutputColumn(),
fnReq.getInputParameters().toArray(new ColumnOrValue[fnReq.getInputParameters().size()]),
columnVersionManager);
// ensure all input columns are fully available on master
for (ColumnOrValue input : fnReq.getInputParameters()) {
if (input.getType().equals(ColumnOrValue.Type.COLUMN))
ensureColumnAvailable(input.getColumnName());
}
functionMasterSteps.put(fnReq.getOutputColumn(),
new ArrayList<>(Arrays.asList(new ExecutablePlanStep[] { projectStep })));
} else if (fnReq.getType().equals(FunctionRequest.Type.AGGREGATION_ROW)) {
List<Object> constantFunctionParams = new ArrayList<>();
for (ColumnOrValue param : fnReq.getInputParameters()) {
if (param.getType().equals(ColumnOrValue.Type.LITERAL))
constantFunctionParams.add(param.getValue());
}
// TODO #28 do NOT calculate all Grouped results on query master, but distribute groups according to group hash
// along all cluster nodes. That means that those clusternodes would fully process specific sets of groups and
// they would need to have all the data needed for calculating those groups transferred to them. This though
// would decrease the load on the query master heavily, especially if the results are ordered by grouped
// columns early.
GroupFinalAggregationStep finalStep =
executablePlanFactory.createGroupFinalAggregationStep(nextMasterStepIdSupplier.get(), env,
fnReq.getFunctionName(), fnReq.getOutputColumn(), columnVersionManager, constantFunctionParams);
functionMasterSteps.put(fnReq.getOutputColumn(),
new ArrayList<>(Arrays.asList(new ExecutablePlanStep[] { finalStep })));
}
// note that the query master does not execute anything for AGGREGATION_COL!
}
@Override
public void ensureColumnAvailable(String colName) {
columnsThatNeedToBeAvailable.add(colName);
}
@Override
public void wireOutputOfColumnIfAvailable(String colName, ExecutablePlanStep targetStep) {
if (functionMasterSteps.containsKey(colName)) {
ExecutablePlanStep previousStep = Iterables.getLast(functionMasterSteps.get(colName));
masterWireManager.wire(ColumnVersionBuiltConsumer.class, previousStep, targetStep);
masterWireManager.wire(ColumnBuiltConsumer.class, previousStep, targetStep);
} else if (columnsThatNeedToBeAvailable.contains(colName)) {
if (!delayedWires.containsKey(colName))
delayedWires.put(colName, new ArrayList<>());
delayedWires.get(colName).add(targetStep);
}
}
/**
* Accepts a step that provides a {@link GroupIntermediaryAggregationConsumer} - these intermediate results are
* provided by cluster nodes and need to be finalized on the query master.
*/
@Override
public void wireGroupInput(ExecutablePlanStep groupIntermediateAggregateSourceStep) {
// wire GroupFinalAggregationSteps to a source of row IDs.
functionMasterSteps.values().stream().flatMap(steps -> steps.stream())
.filter(step -> step instanceof GroupFinalAggregationStep).forEach(new Consumer<ExecutablePlanStep>() {
@Override
public void accept(ExecutablePlanStep groupFinalAggStep) {
masterWireManager.wire(GroupIntermediaryAggregationConsumer.class, groupIntermediateAggregateSourceStep,
groupFinalAggStep);
}
});
}
/**
* Prepares the call to {@link #build()}. Execute before building the steps of the {@link RemoteResolveManager} that
* was specified in the constructor: This method will add any needed resolve steps in that
* {@link RemoteResolveManager} if the query master needs additional columns to be resolved!
*/
@Override
public void prepareBuild() {
// ensure the source columns of the columns that are calculated on the query master are available
// Row Aggregation columns do not need the whole columns on query master, as they will receive the groupIntermediary
// results from the cluster nodes
for (Entry<String, List<ExecutablePlanStep>> remoteEntry : functionMasterSteps.entrySet()) {
PlannerColumnInfo colInfo = columnInfo.get(remoteEntry.getKey());
if (colInfo != null && !colInfo.getType().equals(FunctionRequest.Type.AGGREGATION_ROW))
for (String prevColumnName : colInfo.getDependsOnColumns())
ensureColumnAvailable(prevColumnName);
}
// take care of the columns we need to ensure are available on the query master.
for (String transferColName : Sets.difference(columnsThatNeedToBeAvailable, functionMasterSteps.keySet())) {
ExecutablePlanStep masterCreationStep = executablePlanFactory
.createBuildColumnFromValuesStep(nextMasterStepIdSupplier.get(), env, transferColName, columnVersionManager);
functionMasterSteps.put(transferColName,
new ArrayList<ExecutablePlanStep>(Arrays.asList(new ExecutablePlanStep[] { masterCreationStep })));
// ensure the remote provides values for that column
remoteResolveManager.resolveValuesOfColumn(transferColName);
}
}
/**
* @param columnValuesProvidingStep
* The {@link ExecutablePlanStep} that provides all values of all columns that the cluster nodes have
* resolved. This is needed for those columns that need to be created on the query master (=
* {@link #ensureColumnAvailable(String)} was called).
*/
public void provideColumnValuesProvidingStep(ExecutablePlanStep columnValuesProvidingStep) {
this.columnValuesProvidingStep = columnValuesProvidingStep;
}
/**
* Call {@link #provideColumnValuesProvidingStep(ExecutablePlanStep)} before this!
*/
@Override
public List<ExecutablePlanStep> build() {
if (columnValuesProvidingStep == null)
throw new IllegalStateException("Column values step was not provided.");
// wire the columns that have been created; the source columns are available on the query master already, see
// #prepareBuild.
// Aggregation columns do not need to wait for column builts of parameters on query master, as they will receive the
// groupIntermediary results from the cluster nodes and will not work on the column values directly.
for (Entry<String, List<ExecutablePlanStep>> masterEntry : functionMasterSteps.entrySet()) {
PlannerColumnInfo colInfo = columnInfo.get(masterEntry.getKey());
if (colInfo != null && !colInfo.getType().equals(FunctionRequest.Type.AGGREGATION_ROW)) {
ExecutablePlanStep inputStep = Iterables.getFirst(masterEntry.getValue(), null);
for (String prevColumnName : colInfo.getDependsOnColumns())
wireOutputOfColumnIfAvailable(prevColumnName, inputStep);
}
}
// wire the BuildColumnFromValues steps inputs to the step that is providing the colum values.
functionMasterSteps.values().stream().flatMap(lst -> lst.stream())
.filter(step -> step instanceof BuildColumnFromValuesStep).forEach(new Consumer<ExecutablePlanStep>() {
@Override
public void accept(ExecutablePlanStep buildColumnFromValuesStep) {
masterWireManager.wire(ColumnValueConsumer.class, columnValuesProvidingStep, buildColumnFromValuesStep);
}
});
// execute delayedWires, as now all columns should be represented in functionMasterSteps.
for (String sourceColName : delayedWires.keySet())
for (ExecutablePlanStep targetStep : delayedWires.get(sourceColName))
wireOutputOfColumnIfAvailable(sourceColName, targetStep);
List<ExecutablePlanStep> allSteps =
functionMasterSteps.values().stream().flatMap(lst -> lst.stream()).collect(Collectors.toList());
return allSteps;
}
@Override
public boolean isColumnProduced(String colName) {
return functionMasterSteps.containsKey(colName);
}
}