/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.api.java.operators; import org.apache.flink.annotation.Internal; import org.apache.flink.api.common.InvalidProgramException; import org.apache.flink.api.common.Plan; import org.apache.flink.api.common.operators.AbstractUdfOperator; import org.apache.flink.api.common.operators.BinaryOperatorInformation; import org.apache.flink.api.common.operators.GenericDataSinkBase; import org.apache.flink.api.common.operators.Operator; import org.apache.flink.api.common.operators.UnaryOperatorInformation; import org.apache.flink.api.common.operators.base.BulkIterationBase; import org.apache.flink.api.common.operators.base.DeltaIterationBase; import org.apache.flink.api.java.DataSet; import org.apache.flink.configuration.Configuration; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @Internal public class OperatorTranslation { /** The already translated operations */ private Map<DataSet<?>, Operator<?>> translated = new HashMap<>(); public Plan translateToPlan(List<DataSink<?>> sinks, String jobName) { List<GenericDataSinkBase<?>> planSinks = new ArrayList<>(); for (DataSink<?> sink : sinks) { planSinks.add(translate(sink)); } Plan p = new Plan(planSinks); p.setJobName(jobName); return p; } private <T> GenericDataSinkBase<T> translate(DataSink<T> sink) { // translate the input recursively Operator<T> input = translate(sink.getDataSet()); // translate the sink itself and connect it to the input GenericDataSinkBase<T> translatedSink = sink.translateToDataFlow(input); translatedSink.setResources(sink.getMinResources(), sink.getPreferredResources()); return translatedSink; } private <T> Operator<T> translate(DataSet<T> dataSet) { while (dataSet instanceof NoOpOperator) { dataSet = ((NoOpOperator<T>) dataSet).getInput(); } // check if we have already translated that data set (operation or source) Operator<?> previous = this.translated.get(dataSet); if (previous != null) { // Union operators may only have a single output. // We ensure this by not reusing previously created union operators. // The optimizer will merge subsequent binary unions into one n-ary union. if (!(dataSet instanceof UnionOperator)) { // all other operators are reused. @SuppressWarnings("unchecked") Operator<T> typedPrevious = (Operator<T>) previous; return typedPrevious; } } Operator<T> dataFlowOp; if (dataSet instanceof DataSource) { DataSource<T> dataSource = (DataSource<T>) dataSet; dataFlowOp = dataSource.translateToDataFlow(); dataFlowOp.setResources(dataSource.getMinResources(), dataSource.getPreferredResources()); } else if (dataSet instanceof SingleInputOperator) { SingleInputOperator<?, ?, ?> singleInputOperator = (SingleInputOperator<?, ?, ?>) dataSet; dataFlowOp = translateSingleInputOperator(singleInputOperator); dataFlowOp.setResources(singleInputOperator.getMinResources(), singleInputOperator.getPreferredResources()); } else if (dataSet instanceof TwoInputOperator) { TwoInputOperator<?, ?, ?, ?> twoInputOperator = (TwoInputOperator<?, ?, ?, ?>) dataSet; dataFlowOp = translateTwoInputOperator(twoInputOperator); dataFlowOp.setResources(twoInputOperator.getMinResources(), twoInputOperator.getPreferredResources()); } else if (dataSet instanceof BulkIterationResultSet) { BulkIterationResultSet<?> bulkIterationResultSet = (BulkIterationResultSet<?>) dataSet; dataFlowOp = translateBulkIteration(bulkIterationResultSet); dataFlowOp.setResources(bulkIterationResultSet.getIterationHead().getMinResources(), bulkIterationResultSet.getIterationHead().getPreferredResources()); } else if (dataSet instanceof DeltaIterationResultSet) { DeltaIterationResultSet<?, ?> deltaIterationResultSet = (DeltaIterationResultSet<?, ?>) dataSet; dataFlowOp = translateDeltaIteration(deltaIterationResultSet); dataFlowOp.setResources(deltaIterationResultSet.getIterationHead().getMinResources(), deltaIterationResultSet.getIterationHead().getPreferredResources()); } else if (dataSet instanceof DeltaIteration.SolutionSetPlaceHolder || dataSet instanceof DeltaIteration.WorksetPlaceHolder) { throw new InvalidProgramException("A data set that is part of a delta iteration was used as a sink or action." + " Did you forget to close the iteration?"); } else { throw new RuntimeException("Error while creating the data flow plan for the program: Unknown operator or data set type: " + dataSet); } this.translated.put(dataSet, dataFlowOp); // take care of broadcast variables translateBcVariables(dataSet, dataFlowOp); return dataFlowOp; } private <I, O> org.apache.flink.api.common.operators.Operator<O> translateSingleInputOperator(SingleInputOperator<?, ?, ?> op) { @SuppressWarnings("unchecked") SingleInputOperator<I, O, ?> typedOp = (SingleInputOperator<I, O, ?>) op; @SuppressWarnings("unchecked") DataSet<I> typedInput = (DataSet<I>) op.getInput(); Operator<I> input = translate(typedInput); org.apache.flink.api.common.operators.Operator<O> dataFlowOp = typedOp.translateToDataFlow(input); if (op instanceof UdfOperator<?>) { @SuppressWarnings("unchecked") SingleInputUdfOperator<I, O, ?> udfOp = (SingleInputUdfOperator<I, O, ?>) op; // set configuration parameters Configuration opParams = udfOp.getParameters(); if (opParams != null) { dataFlowOp.getParameters().addAll(opParams); } if (dataFlowOp instanceof org.apache.flink.api.common.operators.SingleInputOperator) { org.apache.flink.api.common.operators.SingleInputOperator<?, O, ?> unaryOp = (org.apache.flink.api.common.operators.SingleInputOperator<?, O, ?>) dataFlowOp; // set the semantic properties unaryOp.setSemanticProperties(udfOp.getSemanticProperties()); } } return dataFlowOp; } private <I1, I2, O> org.apache.flink.api.common.operators.Operator<O> translateTwoInputOperator(TwoInputOperator<?, ?, ?, ?> op) { @SuppressWarnings("unchecked") TwoInputOperator<I1, I2, O, ?> typedOp = (TwoInputOperator<I1, I2, O, ?>) op; @SuppressWarnings("unchecked") DataSet<I1> typedInput1 = (DataSet<I1>) op.getInput1(); @SuppressWarnings("unchecked") DataSet<I2> typedInput2 = (DataSet<I2>) op.getInput2(); Operator<I1> input1 = translate(typedInput1); Operator<I2> input2 = translate(typedInput2); org.apache.flink.api.common.operators.Operator<O> dataFlowOp = typedOp.translateToDataFlow(input1, input2); if (op instanceof UdfOperator<?> ) { @SuppressWarnings("unchecked") TwoInputUdfOperator<I1, I2, O, ?> udfOp = (TwoInputUdfOperator<I1, I2, O, ?>) op; // set configuration parameters Configuration opParams = udfOp.getParameters(); if (opParams != null) { dataFlowOp.getParameters().addAll(opParams); } if (dataFlowOp instanceof org.apache.flink.api.common.operators.DualInputOperator) { org.apache.flink.api.common.operators.DualInputOperator<?, ?, O, ?> binaryOp = (org.apache.flink.api.common.operators.DualInputOperator<?, ?, O, ?>) dataFlowOp; // set the semantic properties binaryOp.setSemanticProperties(udfOp.getSemanticProperties()); } } return dataFlowOp; } private <T> BulkIterationBase<T> translateBulkIteration(BulkIterationResultSet<?> untypedIterationEnd) { @SuppressWarnings("unchecked") BulkIterationResultSet<T> iterationEnd = (BulkIterationResultSet<T>) untypedIterationEnd; BulkIterationBase<T> iterationOperator = new BulkIterationBase<>(new UnaryOperatorInformation<>(iterationEnd.getType(), iterationEnd.getType()), "Bulk Iteration"); IterativeDataSet<T> iterationHead = iterationEnd.getIterationHead(); translated.put(iterationHead, iterationOperator.getPartialSolution()); Operator<T> translatedBody = translate(iterationEnd.getNextPartialSolution()); iterationOperator.setNextPartialSolution(translatedBody); iterationOperator.setMaximumNumberOfIterations(iterationHead.getMaxIterations()); iterationOperator.setInput(translate(iterationHead.getInput())); iterationOperator.getAggregators().addAll(iterationHead.getAggregators()); if(iterationEnd.getTerminationCriterion() != null) { iterationOperator.setTerminationCriterion(translate(iterationEnd.getTerminationCriterion())); } return iterationOperator; } private <D, W> DeltaIterationBase<D, W> translateDeltaIteration(DeltaIterationResultSet<?, ?> untypedIterationEnd) { @SuppressWarnings("unchecked") DeltaIterationResultSet<D, W> iterationEnd = (DeltaIterationResultSet<D, W>) untypedIterationEnd; DeltaIteration<D, W> iterationHead = iterationEnd.getIterationHead(); String name = iterationHead.getName() == null ? "Unnamed Delta Iteration" : iterationHead.getName(); DeltaIterationBase<D, W> iterationOperator = new DeltaIterationBase<>(new BinaryOperatorInformation<>(iterationEnd.getType(), iterationEnd.getWorksetType(), iterationEnd.getType()), iterationEnd.getKeyPositions(), name); iterationOperator.setMaximumNumberOfIterations(iterationEnd.getMaxIterations()); if (iterationHead.getParallelism() > 0) { iterationOperator.setParallelism(iterationHead.getParallelism()); } DeltaIteration.SolutionSetPlaceHolder<D> solutionSetPlaceHolder = iterationHead.getSolutionSet(); DeltaIteration.WorksetPlaceHolder<W> worksetPlaceHolder = iterationHead.getWorkset(); translated.put(solutionSetPlaceHolder, iterationOperator.getSolutionSet()); translated.put(worksetPlaceHolder, iterationOperator.getWorkset()); Operator<D> translatedSolutionSet = translate(iterationEnd.getNextSolutionSet()); Operator<W> translatedWorkset = translate(iterationEnd.getNextWorkset()); iterationOperator.setNextWorkset(translatedWorkset); iterationOperator.setSolutionSetDelta(translatedSolutionSet); iterationOperator.setInitialSolutionSet(translate(iterationHead.getInitialSolutionSet())); iterationOperator.setInitialWorkset(translate(iterationHead.getInitialWorkset())); // register all aggregators iterationOperator.getAggregators().addAll(iterationHead.getAggregators()); iterationOperator.setSolutionSetUnManaged(iterationHead.isSolutionSetUnManaged()); return iterationOperator; } private void translateBcVariables(DataSet<?> setOrOp, Operator<?> dataFlowOp) { // check if this is actually an operator that could have broadcast variables if (setOrOp instanceof UdfOperator) { if (!(dataFlowOp instanceof AbstractUdfOperator<?, ?>)) { throw new RuntimeException("Error while creating the data flow plan for the program: A UDF operation was not translated to a UDF operator."); } UdfOperator<?> udfOp = (UdfOperator<?>) setOrOp; AbstractUdfOperator<?, ?> udfDataFlowOp = (AbstractUdfOperator<?, ?>) dataFlowOp; for (Map.Entry<String, DataSet<?>> bcVariable : udfOp.getBroadcastSets().entrySet()) { Operator<?> bcInput = translate(bcVariable.getValue()); udfDataFlowOp.setBroadcastVariable(bcVariable.getKey(), bcInput); } } } }