/***********************************************************************************************************************
*
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.api.java.operators;
import eu.stratosphere.api.common.operators.AbstractUdfOperator;
import eu.stratosphere.api.common.operators.BinaryOperatorInformation;
import eu.stratosphere.api.common.operators.Operator;
import eu.stratosphere.api.common.operators.UnaryOperatorInformation;
import eu.stratosphere.api.common.operators.base.BulkIterationBase;
import eu.stratosphere.api.common.operators.base.DeltaIterationBase;
import eu.stratosphere.api.common.operators.base.GenericDataSinkBase;
import eu.stratosphere.api.java.BulkIterationResultSet;
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.DeltaIteration;
import eu.stratosphere.api.java.DeltaIterationResultSet;
import eu.stratosphere.api.java.IterativeDataSet;
import eu.stratosphere.api.java.operators.translation.JavaPlan;
import eu.stratosphere.configuration.Configuration;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class OperatorTranslation {
/** The already translated operations */
private Map<DataSet<?>, Operator<?>> translated = new HashMap<DataSet<?>, Operator<?>>();
public JavaPlan translateToPlan(List<DataSink<?>> sinks, String jobName) {
List<GenericDataSinkBase<?>> planSinks = new ArrayList<GenericDataSinkBase<?>>();
for (DataSink<?> sink : sinks) {
planSinks.add(translate(sink));
}
JavaPlan p = new JavaPlan(planSinks);
p.setJobName(jobName);
return p;
}
private <T> GenericDataSinkBase<T> translate(DataSink<T> sink) {
// translate the input recursively
Operator<T> input = translate(sink.getDataSet());
// translate the sink itself and connect it to the input
GenericDataSinkBase<T> translatedSink = sink.translateToDataFlow(input);
return translatedSink;
}
private <T> Operator<T> translate(DataSet<T> dataSet) {
// check if we have already translated that data set (operation or source)
Operator<?> previous = (Operator<?>) this.translated.get(dataSet);
if (previous != null) {
@SuppressWarnings("unchecked")
Operator<T> typedPrevious = (Operator<T>) previous;
return typedPrevious;
}
Operator<T> dataFlowOp;
if (dataSet instanceof DataSource) {
dataFlowOp = ((DataSource<T>) dataSet).translateToDataFlow();
}
else if (dataSet instanceof SingleInputOperator) {
dataFlowOp = translateSingleInputOperator((SingleInputOperator<?, ?, ?>) dataSet);
}
else if (dataSet instanceof TwoInputOperator) {
dataFlowOp = translateTwoInputOperator((TwoInputOperator<?, ?, ?, ?>) dataSet);
}
else if (dataSet instanceof BulkIterationResultSet) {
dataFlowOp = translateBulkIteration((BulkIterationResultSet<?>) dataSet);
}
else if (dataSet instanceof DeltaIterationResultSet) {
dataFlowOp = translateDeltaIteration((DeltaIterationResultSet<?, ?>) dataSet);
}
else {
throw new RuntimeException("Error while creating the data flow plan for the program: Unknown operator or data set type: " + dataSet);
}
this.translated.put(dataSet, dataFlowOp);
// take care of broadcast variables
translateBcVariables(dataSet, dataFlowOp);
return dataFlowOp;
}
private <I, O> eu.stratosphere.api.common.operators.SingleInputOperator<?, O, ?> translateSingleInputOperator(SingleInputOperator<?, ?, ?> op) {
@SuppressWarnings("unchecked")
SingleInputOperator<I, O, ?> typedOp = (SingleInputOperator<I, O, ?>) op;
@SuppressWarnings("unchecked")
DataSet<I> typedInput = (DataSet<I>) op.getInput();
Operator<I> input = translate(typedInput);
eu.stratosphere.api.common.operators.SingleInputOperator<?, O, ?> dataFlowOp = typedOp.translateToDataFlow(input);
if (op instanceof UdfOperator<?> ) {
@SuppressWarnings("unchecked")
SingleInputUdfOperator<I, O, ?> udfOp = (SingleInputUdfOperator<I, O, ?>) op;
// set configuration parameters
Configuration opParams = udfOp.getParameters();
if (opParams != null) {
dataFlowOp.getParameters().addAll(opParams);
}
// set the semantic properties
dataFlowOp.setSemanticProperties(udfOp.getSematicProperties());
}
return dataFlowOp;
}
private <I1, I2, O> eu.stratosphere.api.common.operators.DualInputOperator<?, ?, O, ?> translateTwoInputOperator(TwoInputOperator<?, ?, ?, ?> op) {
@SuppressWarnings("unchecked")
TwoInputOperator<I1, I2, O, ?> typedOp = (TwoInputOperator<I1, I2, O, ?>) op;
@SuppressWarnings("unchecked")
DataSet<I1> typedInput1 = (DataSet<I1>) op.getInput1();
@SuppressWarnings("unchecked")
DataSet<I2> typedInput2 = (DataSet<I2>) op.getInput2();
Operator<I1> input1 = translate(typedInput1);
Operator<I2> input2 = translate(typedInput2);
eu.stratosphere.api.common.operators.DualInputOperator<?, ?, O, ?> dataFlowOp = typedOp.translateToDataFlow(input1, input2);
if (op instanceof UdfOperator<?> ) {
@SuppressWarnings("unchecked")
TwoInputUdfOperator<I1, I2, O, ?> udfOp = (TwoInputUdfOperator<I1, I2, O, ?>) op;
// set configuration parameters
Configuration opParams = udfOp.getParameters();
if (opParams != null) {
dataFlowOp.getParameters().addAll(opParams);
}
// set the semantic properties
dataFlowOp.setSemanticProperties(udfOp.getSematicProperties());
}
return dataFlowOp;
}
private <T> BulkIterationBase<T> translateBulkIteration(BulkIterationResultSet<?> untypedIterationEnd) {
@SuppressWarnings("unchecked")
BulkIterationResultSet<T> iterationEnd = (BulkIterationResultSet<T>) untypedIterationEnd;
BulkIterationBase<T> iterationOperator =
new BulkIterationBase<T>(new UnaryOperatorInformation<T, T>(iterationEnd.getType(), iterationEnd.getType()), "Bulk Iteration");
IterativeDataSet<T> iterationHead = iterationEnd.getIterationHead();
translated.put(iterationHead, iterationOperator.getPartialSolution());
Operator<T> translatedBody = translate(iterationEnd.getNextPartialSolution());
iterationOperator.setNextPartialSolution(translatedBody);
iterationOperator.setMaximumNumberOfIterations(iterationHead.getMaxIterations());
iterationOperator.setInput(translate(iterationHead.getInput()));
iterationOperator.getAggregators().addAll(iterationHead.getAggregators());
if(iterationEnd.getTerminationCriterion() != null) {
iterationOperator.setTerminationCriterion(translate(iterationEnd.getTerminationCriterion()));
}
return iterationOperator;
}
private <D, W> DeltaIterationBase<D, W> translateDeltaIteration(DeltaIterationResultSet<?, ?> untypedIterationEnd) {
@SuppressWarnings("unchecked")
DeltaIterationResultSet<D, W> iterationEnd = (DeltaIterationResultSet<D, W>) untypedIterationEnd;
DeltaIteration<D, W> iterationHead = iterationEnd.getIterationHead();
String name = iterationHead.getName() == null ? "Unnamed Delta Iteration" : iterationHead.getName();
DeltaIterationBase<D, W> iterationOperator = new DeltaIterationBase<D, W>(new BinaryOperatorInformation<D, W, D>(iterationEnd.getType(), iterationEnd.getWorksetType(), iterationEnd.getType()),
iterationEnd.getKeyPositions(), name);
iterationOperator.setMaximumNumberOfIterations(iterationEnd.getMaxIterations());
if (iterationHead.getParallelism() > 0) {
iterationOperator.setDegreeOfParallelism(iterationHead.getParallelism());
}
DeltaIteration.SolutionSetPlaceHolder<D> solutionSetPlaceHolder = iterationHead.getSolutionSet();
DeltaIteration.WorksetPlaceHolder<W> worksetPlaceHolder = iterationHead.getWorkset();
translated.put(solutionSetPlaceHolder, iterationOperator.getSolutionSet());
translated.put(worksetPlaceHolder, iterationOperator.getWorkset());
Operator<D> translatedSolutionSet = translate(iterationEnd.getNextSolutionSet());
Operator<W> translatedWorkset = translate(iterationEnd.getNextWorkset());
iterationOperator.setNextWorkset(translatedWorkset);
iterationOperator.setSolutionSetDelta(translatedSolutionSet);
iterationOperator.setInitialSolutionSet(translate(iterationHead.getInitialSolutionSet()));
iterationOperator.setInitialWorkset(translate(iterationHead.getInitialWorkset()));
// register all aggregators
iterationOperator.getAggregators().addAll(iterationHead.getAggregators());
return iterationOperator;
}
private void translateBcVariables(DataSet<?> setOrOp, Operator<?> dataFlowOp) {
// check if this is actually an operator that could have broadcast variables
if (setOrOp instanceof UdfOperator) {
if (!(dataFlowOp instanceof AbstractUdfOperator<?, ?>)) {
throw new RuntimeException("Error while creating the data flow plan for the program: A UDF operation was not translated to a UDF operator.");
}
UdfOperator<?> udfOp = (UdfOperator<?>) setOrOp;
AbstractUdfOperator<?, ?> udfDataFlowOp = (AbstractUdfOperator<?, ?>) dataFlowOp;
for (Map.Entry<String, DataSet<?>> bcVariable : udfOp.getBroadcastSets().entrySet()) {
Operator<?> bcInput = translate(bcVariable.getValue());
udfDataFlowOp.setBroadcastVariable(bcVariable.getKey(), bcInput);
}
}
}
}