/**
* Copyright 2016-2017 Seznam.cz, a.s.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cz.seznam.euphoria.flink;
import cz.seznam.euphoria.core.client.graph.DAG;
import cz.seznam.euphoria.core.client.operator.Operator;
import cz.seznam.euphoria.core.client.operator.PartitioningAware;
import cz.seznam.euphoria.core.executor.FlowUnfolder;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
/**
* Converts DAG of Euphoria {@link Operator} to Flink-layer specific
* DAG of {@link FlinkOperator}. During the conversion some
* kind of optimization can be made.
*/
public class FlowOptimizer {
private int maxParallelism = Integer.MAX_VALUE;
public int getMaxParallelism() {
return maxParallelism;
}
public void setMaxParallelism(int maxParallelism) {
this.maxParallelism = maxParallelism;
}
public DAG<FlinkOperator<Operator<?, ?>>> optimize(DAG<Operator<?, ?>> dag) {
DAG<FlinkOperator<Operator<?, ?>>> flinkDag = convert(dag);
// setup parallelism
return setParallelism(flinkDag);
}
/**
* Converts DAG of Euphoria {@link Operator} to Flink-layer specific
* DAG of {@link FlinkOperator}.
*/
private DAG<FlinkOperator<Operator<?, ?>>> convert(DAG<Operator<?, ?>> dag) {
@SuppressWarnings("unchecked")
DAG<FlinkOperator<Operator<?, ?>>> output = DAG.of();
// mapping between original operator and newly created executor
// specific wrapper
final Map<Operator<?, ?>, FlinkOperator<Operator<?, ?>>> mapping = new HashMap<>();
dag.traverse().forEach(n -> {
Operator<?, ?> current = n.get();
FlinkOperator<Operator<?, ?>> created = new FlinkOperator<>(current);
mapping.put(current, created);
List<FlinkOperator<Operator<?, ?>>> parents = n.getParents().stream().map(
p -> mapping.get(p.get())).collect(Collectors.toList());
output.add(created, parents);
});
return output;
}
/**
* Modifies given DAG in a way that all operators
* will have the parallelism explicitly defined.
* @param dag Original DAG
* @return Modified DAG
*/
private DAG<FlinkOperator<Operator<?, ?>>>
setParallelism(DAG<FlinkOperator<Operator<?, ?>>> dag) {
dag.traverse().forEach(n -> {
FlinkOperator flinkOp = n.get();
Operator<?, ?> op = flinkOp.getOriginalOperator();
if (op instanceof FlowUnfolder.InputOperator) {
int partitions = op.output().getSource().getPartitions().size();
flinkOp.setParallelism(Math.min(maxParallelism, partitions));
} else if (op instanceof PartitioningAware) {
int partitions = ((PartitioningAware) op).getPartitioning().getNumPartitions();
flinkOp.setParallelism(Math.min(maxParallelism, partitions));
} else {
// other operators inherit parallelism from their parents
flinkOp.setParallelism(
n.getParents().stream().mapToInt(
p -> p.get().getParallelism()).max().getAsInt());
}
});
return dag;
}
}