/** * diqube: Distributed Query Base. * * Copyright (C) 2015 Bastian Gloeckle * * This file is part of diqube. * * diqube is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.diqube.execution; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.UUID; import java.util.stream.Collectors; import org.diqube.data.table.Table; import org.diqube.data.table.TableShard; import org.diqube.execution.consumers.ColumnBuiltConsumer; import org.diqube.execution.consumers.ColumnDictIdConsumer; import org.diqube.execution.consumers.ColumnValueConsumer; import org.diqube.execution.consumers.GenericConsumer; import org.diqube.execution.consumers.GroupConsumer; import org.diqube.execution.consumers.GroupDeltaConsumer; import org.diqube.execution.consumers.GroupFinalAggregationConsumer; import org.diqube.execution.consumers.GroupIntermediaryAggregationConsumer; import org.diqube.execution.consumers.OrderedRowIdConsumer; import org.diqube.execution.consumers.RowIdConsumer; import org.diqube.execution.exception.ExecutablePlanBuildException; import org.diqube.execution.steps.GroupIntermediaryAggregationStep; import org.diqube.execution.steps.ResolveValuesStep; import org.diqube.executionenv.ExecutionEnvironment; import org.diqube.executionenv.ExecutionEnvironmentFactory; import org.diqube.executionenv.FlattenedTableInstanceManager; import org.diqube.executionenv.TableRegistry; import org.diqube.queries.QueryUuid.QueryUuidThreadState; import org.diqube.remote.cluster.thrift.RExecutionPlan; import org.diqube.remote.cluster.thrift.RExecutionPlanStep; import org.diqube.remote.cluster.thrift.RExecutionPlanStepDataType; import org.diqube.remote.cluster.thrift.RExecutionPlanStepType; import org.diqube.thrift.base.util.RUuidUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Builds an {@link ExecutablePlan} out of a {@link RExecutionPlan}. * * <p> * This builder is not that intelligent: It simply instantiates the correct {@link ExecutablePlanStep}s as defined by * the {@link RExecutionPlanStep} and uses the wiring defined there. * * @author Bastian Gloeckle */ public class ExecutablePlanFromRemoteBuilder { private static final Logger logger = LoggerFactory.getLogger(ExecutablePlanFromRemoteBuilder.class); private static final Map<RExecutionPlanStepDataType, Class<? extends GenericConsumer>> stepDataTypeToConsumerClass = new HashMap<>(); private RExecutionPlan plan; private TableRegistry tableRegistry; private ExecutionEnvironmentFactory executionEnvironmentFactory; private ExecutablePlanStepFromRemoteFactory executablePlanStepFactory; private ColumnValueConsumer columnValueConsumer; private GroupIntermediaryAggregationConsumer groupIntermediaryAggregationConsumer; private ExecutablePlanFactory executablePlanFactory; private FlattenedTableInstanceManager flattenedTableManager; /* package */ ExecutablePlanFromRemoteBuilder(TableRegistry tableRegistry, ExecutionEnvironmentFactory executionEnvironmentFactory, ExecutablePlanStepFromRemoteFactory executablePlanStepFactory, ExecutablePlanFactory executablePlanFactory, FlattenedTableInstanceManager flattenedTableManager) { this.tableRegistry = tableRegistry; this.executionEnvironmentFactory = executionEnvironmentFactory; this.executablePlanStepFactory = executablePlanStepFactory; this.executablePlanFactory = executablePlanFactory; this.flattenedTableManager = flattenedTableManager; } /** * Use the given {@link RExecutionPlanStep} to build from. */ public ExecutablePlanFromRemoteBuilder withRemoteExecutionPlan(RExecutionPlan plan) { this.plan = plan; return this; } /** * Send all data that was finally loaded to the given {@link ColumnValueConsumer}. */ public ExecutablePlanFromRemoteBuilder withFinalColumnValueConsumer(ColumnValueConsumer columnValueConsumer) { this.columnValueConsumer = columnValueConsumer; return this; } /** * Send the results of all {@link GroupIntermediaryAggregationStep} to the given * {@link GroupIntermediaryAggregationConsumer}. */ public ExecutablePlanFromRemoteBuilder withFinalGroupIntermediateAggregationConsumer( GroupIntermediaryAggregationConsumer groupIntermediaryAggregationConsumer) { this.groupIntermediaryAggregationConsumer = groupIntermediaryAggregationConsumer; return this; } /** * Build the {@link ExecutablePlan}s, for each {@link TableShard} that is available on this node one. * * <p> * This method must be executed with correct {@link QueryUuidThreadState} set, as {@link RemoteExecutionPlanOptimizer} * needs correct thread state! */ public List<ExecutablePlan> build() throws ExecutablePlanBuildException { Table table; if (plan.getFromSpec().isSetPlainTableName()) { String tableName = plan.getFromSpec().getPlainTableName(); table = tableRegistry.getTable(tableName); if (table == null) { throw new ExecutablePlanBuildException("Table '" + tableName + "' does not exist."); } } else { String tableName = plan.getFromSpec().getFlattened().getTableName(); String flattenBy = plan.getFromSpec().getFlattened().getFlattenBy(); UUID flattenId = RUuidUtil.toUuid(plan.getFromSpec().getFlattened().getFlattenId()); table = flattenedTableManager.getFlattenedTable(flattenId, tableName, flattenBy); if (table == null) throw new ExecutablePlanBuildException( "Table '" + tableName + "' flattend by '" + flattenBy + "' with id " + flattenId + " is not available."); } List<ExecutablePlan> res = new ArrayList<>(table.getShards().size()); for (TableShard tableShard : table.getShards()) { ExecutionEnvironment defaultEnv = executionEnvironmentFactory.createQueryRemoteExecutionEnvironment(tableShard); Map<Integer, ExecutablePlanStep> steps = new HashMap<>(); Map<Integer, RExecutionPlanStep> remoteSteps = new HashMap<>(); // note that the following optimization might already put some columns in the Env (from the ColumnShardCache). RExecutionPlan optimizedRemotePlan = new RemoteExecutionPlanOptimizer().optimize(defaultEnv, plan); for (RExecutionPlanStep remoteStep : optimizedRemotePlan.getSteps()) { ExecutablePlanStep newStep = executablePlanStepFactory.createExecutableStep(defaultEnv, remoteStep); steps.put(remoteStep.getStepId(), newStep); remoteSteps.put(remoteStep.getStepId(), remoteStep); } // Wire the data flow. for (Entry<Integer, ExecutablePlanStep> stepEntry : steps.entrySet()) { ExecutablePlanStep sourceStep = stepEntry.getValue(); RExecutionPlanStep remoteStep = remoteSteps.get(sourceStep.getStepId()); // Use the data flow specifications from the RExecutionPlan. if (remoteStep.getProvideDataForStepsSize() > 0) { for (Entry<Integer, List<RExecutionPlanStepDataType>> targetEntry : remoteStep.getProvideDataForSteps() .entrySet()) { int targetIdx = targetEntry.getKey(); ExecutablePlanStep targetStep = steps.get(targetIdx); if (targetStep == null) throw new ExecutablePlanBuildException("Could not find data flow target."); for (RExecutionPlanStepDataType type : targetEntry.getValue()) { logger.trace("Wiring {} from {} to {}", new Object[] { stepDataTypeToConsumerClass.get(type), sourceStep, targetStep }); targetStep.wireOneInputConsumerToOutputOf(stepDataTypeToConsumerClass.get(type), sourceStep); } } } // add the manually specified ColumnValueConsumer to the ResolveValueStep (which should be exactly one and which // should not have an output consumer set yet). if (sourceStep instanceof ResolveValuesStep) sourceStep.addOutputConsumer(columnValueConsumer); if (sourceStep instanceof GroupIntermediaryAggregationStep && groupIntermediaryAggregationConsumer != null) sourceStep.addOutputConsumer(groupIntermediaryAggregationConsumer); } ExecutablePlanInfo info = createExecutablePlanInfo(optimizedRemotePlan); ExecutablePlan executablePlan = executablePlanFactory.createExecutablePlan(defaultEnv, new ArrayList<>(steps.values()), info, null /* no col version manager on remote as there are no colversions used here */); res.add(executablePlan); } return res; } private ExecutablePlanInfo createExecutablePlanInfo(RExecutionPlan plan) { List<String> selectedCols = new ArrayList<>(); selectedCols .addAll(plan.getSteps().stream().filter(s -> s.getType().equals(RExecutionPlanStepType.RESOLVE_COLUMN_DICT_IDS)) .map(s -> s.getDetailsResolve().getColumn().getColName()).collect(Collectors.toList())); boolean isOrdered = plan.getSteps().stream().anyMatch(s -> s.getType().equals(RExecutionPlanStepType.ORDER)); boolean isGrouped = plan.getSteps().stream().anyMatch(s -> s.getType().equals(RExecutionPlanStepType.GROUP)); return executablePlanFactory.createExecutablePlanInfo(selectedCols, null /* query remote does not provide requests */, isOrdered, isGrouped, false /* there cannot be a HAVING, because were on a query remote */); } static { // keep in sync with RemoteWireManager stepDataTypeToConsumerClass.put(RExecutionPlanStepDataType.COLUMN_BUILT, ColumnBuiltConsumer.class); stepDataTypeToConsumerClass.put(RExecutionPlanStepDataType.COLUMN_DICT_ID, ColumnDictIdConsumer.class); stepDataTypeToConsumerClass.put(RExecutionPlanStepDataType.COLUMN_VALUE, ColumnValueConsumer.class); stepDataTypeToConsumerClass.put(RExecutionPlanStepDataType.GROUP, GroupConsumer.class); stepDataTypeToConsumerClass.put(RExecutionPlanStepDataType.GROUP_DELTA, GroupDeltaConsumer.class); stepDataTypeToConsumerClass.put(RExecutionPlanStepDataType.GROUP_FINAL_AGG, GroupFinalAggregationConsumer.class); stepDataTypeToConsumerClass.put(RExecutionPlanStepDataType.GROUP_INTERMEDIARY_AGG, GroupIntermediaryAggregationConsumer.class); stepDataTypeToConsumerClass.put(RExecutionPlanStepDataType.ORDERED_ROW_ID, OrderedRowIdConsumer.class); stepDataTypeToConsumerClass.put(RExecutionPlanStepDataType.ROW_ID, RowIdConsumer.class); } }