/*
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.storm.api;
import org.apache.storm.generated.ComponentCommon;
import org.apache.storm.generated.GlobalStreamId;
import org.apache.storm.generated.Grouping;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.IRichSpout;
import org.apache.storm.topology.IRichStateSpout;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.storm.util.SplitStreamMapper;
import org.apache.flink.storm.util.SplitStreamType;
import org.apache.flink.storm.util.StormStreamSelector;
import org.apache.flink.storm.wrappers.BoltWrapper;
import org.apache.flink.storm.wrappers.MergedInputsBoltWrapper;
import org.apache.flink.storm.wrappers.SpoutWrapper;
import org.apache.flink.storm.wrappers.StormTuple;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.datastream.SplitStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.InstantiationUtil;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
/**
* {@link FlinkTopology} translates a {@link TopologyBuilder} to a Flink program.
* <strong>CAUTION: {@link IRichStateSpout StateSpout}s are currently not supported.</strong>
*/
public class FlinkTopology {
/** All declared streams and output schemas by operator ID */
private final HashMap<String, HashMap<String, Fields>> outputStreams = new HashMap<String, HashMap<String, Fields>>();
/** All spouts&bolts declarers by their ID */
private final HashMap<String, FlinkOutputFieldsDeclarer> declarers = new HashMap<String, FlinkOutputFieldsDeclarer>();
private final HashMap<String, Set<Entry<GlobalStreamId, Grouping>>> unprocessdInputsPerBolt =
new HashMap<String, Set<Entry<GlobalStreamId, Grouping>>>();
final HashMap<String, HashMap<String, DataStream<Tuple>>> availableInputs = new HashMap<>();
private final TopologyBuilder builder;
// needs to be a class member for internal testing purpose
private final StormTopology stormTopology;
private final Map<String, IRichSpout> spouts;
private final Map<String, IRichBolt> bolts;
private final StreamExecutionEnvironment env;
private FlinkTopology(TopologyBuilder builder) {
this.builder = builder;
this.stormTopology = builder.createTopology();
// extract the spouts and bolts
this.spouts = getPrivateField("_spouts");
this.bolts = getPrivateField("_bolts");
this.env = StreamExecutionEnvironment.getExecutionEnvironment();
// Kick off the translation immediately
translateTopology();
}
/**
*
* Creates a Flink program that uses the specified spouts and bolts.
* @param stormBuilder The Storm topology builder to use for creating the Flink topology.
* @return A {@link FlinkTopology} which contains the translated Storm topology and may be executed.
*/
public static FlinkTopology createTopology(TopologyBuilder stormBuilder) {
return new FlinkTopology(stormBuilder);
}
/**
* Returns the underlying Flink {@link StreamExecutionEnvironment} for the Storm topology.
* @return The contextual environment (local or remote).
*/
public StreamExecutionEnvironment getExecutionEnvironment() {
return this.env;
}
/**
* Directly executes the Storm topology based on the current context (local when in IDE and
* remote when executed through ./bin/flink).
* @return The Flink {@link JobExecutionResult} after the execution of the Storm topology.
* @throws Exception which occurs during execution of the translated Storm topology.
*/
public JobExecutionResult execute() throws Exception {
return env.execute();
}
@SuppressWarnings("unchecked")
private <T> Map<String, T> getPrivateField(String field) {
try {
Field f = builder.getClass().getDeclaredField(field);
f.setAccessible(true);
return copyObject((Map<String, T>) f.get(builder));
} catch (NoSuchFieldException | IllegalAccessException e) {
throw new RuntimeException("Couldn't get " + field + " from TopologyBuilder", e);
}
}
private <T> T copyObject(T object) {
try {
return InstantiationUtil.deserializeObject(
InstantiationUtil.serializeObject(object),
getClass().getClassLoader()
);
} catch (IOException | ClassNotFoundException e) {
throw new RuntimeException("Failed to copy object.", e);
}
}
/**
* Creates a Flink program that uses the specified spouts and bolts.
*/
private void translateTopology() {
unprocessdInputsPerBolt.clear();
outputStreams.clear();
declarers.clear();
availableInputs.clear();
// Storm defaults to parallelism 1
env.setParallelism(1);
/* Translation of topology */
for (final Entry<String, IRichSpout> spout : spouts.entrySet()) {
final String spoutId = spout.getKey();
final IRichSpout userSpout = spout.getValue();
final FlinkOutputFieldsDeclarer declarer = new FlinkOutputFieldsDeclarer();
userSpout.declareOutputFields(declarer);
final HashMap<String,Fields> sourceStreams = declarer.outputStreams;
this.outputStreams.put(spoutId, sourceStreams);
declarers.put(spoutId, declarer);
final HashMap<String, DataStream<Tuple>> outputStreams = new HashMap<String, DataStream<Tuple>>();
final DataStreamSource<?> source;
if (sourceStreams.size() == 1) {
final SpoutWrapper<Tuple> spoutWrapperSingleOutput = new SpoutWrapper<Tuple>(userSpout, spoutId, null, null);
spoutWrapperSingleOutput.setStormTopology(stormTopology);
final String outputStreamId = (String) sourceStreams.keySet().toArray()[0];
DataStreamSource<Tuple> src = env.addSource(spoutWrapperSingleOutput, spoutId,
declarer.getOutputType(outputStreamId));
outputStreams.put(outputStreamId, src);
source = src;
} else {
final SpoutWrapper<SplitStreamType<Tuple>> spoutWrapperMultipleOutputs = new SpoutWrapper<SplitStreamType<Tuple>>(
userSpout, spoutId, null, null);
spoutWrapperMultipleOutputs.setStormTopology(stormTopology);
@SuppressWarnings({ "unchecked", "rawtypes" })
DataStreamSource<SplitStreamType<Tuple>> multiSource = env.addSource(
spoutWrapperMultipleOutputs, spoutId,
(TypeInformation) TypeExtractor.getForClass(SplitStreamType.class));
SplitStream<SplitStreamType<Tuple>> splitSource = multiSource
.split(new StormStreamSelector<Tuple>());
for (String streamId : sourceStreams.keySet()) {
SingleOutputStreamOperator<Tuple> outStream = splitSource.select(streamId)
.map(new SplitStreamMapper<Tuple>());
outStream.getTransformation().setOutputType(declarer.getOutputType(streamId));
outputStreams.put(streamId, outStream);
}
source = multiSource;
}
availableInputs.put(spoutId, outputStreams);
final ComponentCommon common = stormTopology.get_spouts().get(spoutId).get_common();
if (common.is_set_parallelism_hint()) {
int dop = common.get_parallelism_hint();
source.setParallelism(dop);
} else {
common.set_parallelism_hint(1);
}
}
/**
* 1. Connect all spout streams with bolts streams
* 2. Then proceed with the bolts stream already connected
*
* Because we do not know the order in which an iterator steps over a set, we might process a consumer before
* its producer
* ->thus, we might need to repeat multiple times
*/
boolean makeProgress = true;
while (bolts.size() > 0) {
if (!makeProgress) {
StringBuilder strBld = new StringBuilder();
strBld.append("Unable to build Topology. Could not connect the following bolts:");
for (String boltId : bolts.keySet()) {
strBld.append("\n ");
strBld.append(boltId);
strBld.append(": missing input streams [");
for (Entry<GlobalStreamId, Grouping> streams : unprocessdInputsPerBolt
.get(boltId)) {
strBld.append("'");
strBld.append(streams.getKey().get_streamId());
strBld.append("' from '");
strBld.append(streams.getKey().get_componentId());
strBld.append("'; ");
}
strBld.append("]");
}
throw new RuntimeException(strBld.toString());
}
makeProgress = false;
final Iterator<Entry<String, IRichBolt>> boltsIterator = bolts.entrySet().iterator();
while (boltsIterator.hasNext()) {
final Entry<String, IRichBolt> bolt = boltsIterator.next();
final String boltId = bolt.getKey();
final IRichBolt userBolt = copyObject(bolt.getValue());
final ComponentCommon common = stormTopology.get_bolts().get(boltId).get_common();
Set<Entry<GlobalStreamId, Grouping>> unprocessedBoltInputs = unprocessdInputsPerBolt.get(boltId);
if (unprocessedBoltInputs == null) {
unprocessedBoltInputs = new HashSet<>();
unprocessedBoltInputs.addAll(common.get_inputs().entrySet());
unprocessdInputsPerBolt.put(boltId, unprocessedBoltInputs);
}
// check if all inputs are available
final int numberOfInputs = unprocessedBoltInputs.size();
int inputsAvailable = 0;
for (Entry<GlobalStreamId, Grouping> entry : unprocessedBoltInputs) {
final String producerId = entry.getKey().get_componentId();
final String streamId = entry.getKey().get_streamId();
final HashMap<String, DataStream<Tuple>> streams = availableInputs.get(producerId);
if (streams != null && streams.get(streamId) != null) {
inputsAvailable++;
}
}
if (inputsAvailable != numberOfInputs) {
// traverse other bolts first until inputs are available
continue;
} else {
makeProgress = true;
boltsIterator.remove();
}
final Map<GlobalStreamId, DataStream<Tuple>> inputStreams = new HashMap<>(numberOfInputs);
for (Entry<GlobalStreamId, Grouping> input : unprocessedBoltInputs) {
final GlobalStreamId streamId = input.getKey();
final Grouping grouping = input.getValue();
final String producerId = streamId.get_componentId();
final Map<String, DataStream<Tuple>> producer = availableInputs.get(producerId);
inputStreams.put(streamId, processInput(boltId, userBolt, streamId, grouping, producer));
}
final SingleOutputStreamOperator<?> outputStream = createOutput(boltId,
userBolt, inputStreams);
if (common.is_set_parallelism_hint()) {
int dop = common.get_parallelism_hint();
outputStream.setParallelism(dop);
} else {
common.set_parallelism_hint(1);
}
}
}
}
private DataStream<Tuple> processInput(String boltId, IRichBolt userBolt,
GlobalStreamId streamId, Grouping grouping,
Map<String, DataStream<Tuple>> producer) {
assert (userBolt != null);
assert (boltId != null);
assert (streamId != null);
assert (grouping != null);
assert (producer != null);
final String producerId = streamId.get_componentId();
final String inputStreamId = streamId.get_streamId();
DataStream<Tuple> inputStream = producer.get(inputStreamId);
final FlinkOutputFieldsDeclarer declarer = new FlinkOutputFieldsDeclarer();
declarers.put(boltId, declarer);
userBolt.declareOutputFields(declarer);
this.outputStreams.put(boltId, declarer.outputStreams);
// if producer was processed already
if (grouping.is_set_shuffle()) {
// Storm uses a round-robin shuffle strategy
inputStream = inputStream.rebalance();
} else if (grouping.is_set_fields()) {
// global grouping is emulated in Storm via an empty fields grouping list
final List<String> fields = grouping.get_fields();
if (fields.size() > 0) {
FlinkOutputFieldsDeclarer prodDeclarer = this.declarers.get(producerId);
inputStream = inputStream.keyBy(prodDeclarer
.getGroupingFieldIndexes(inputStreamId,
grouping.get_fields()));
} else {
inputStream = inputStream.global();
}
} else if (grouping.is_set_all()) {
inputStream = inputStream.broadcast();
} else if (!grouping.is_set_local_or_shuffle()) {
throw new UnsupportedOperationException(
"Flink only supports (local-or-)shuffle, fields, all, and global grouping");
}
return inputStream;
}
@SuppressWarnings({ "unchecked", "rawtypes" })
private SingleOutputStreamOperator<?> createOutput(String boltId, IRichBolt bolt,
Map<GlobalStreamId, DataStream<Tuple>> inputStreams) {
assert (boltId != null);
assert (bolt != null);
assert (inputStreams != null);
Iterator<Entry<GlobalStreamId, DataStream<Tuple>>> iterator = inputStreams.entrySet()
.iterator();
Entry<GlobalStreamId, DataStream<Tuple>> input1 = iterator.next();
GlobalStreamId streamId1 = input1.getKey();
String inputStreamId1 = streamId1.get_streamId();
String inputComponentId1 = streamId1.get_componentId();
Fields inputSchema1 = this.outputStreams.get(inputComponentId1).get(inputStreamId1);
DataStream<Tuple> singleInputStream = input1.getValue();
DataStream<StormTuple<Tuple>> mergedInputStream = null;
while (iterator.hasNext()) {
Entry<GlobalStreamId, DataStream<Tuple>> input2 = iterator.next();
GlobalStreamId streamId2 = input2.getKey();
DataStream<Tuple> inputStream2 = input2.getValue();
if (mergedInputStream == null) {
mergedInputStream = singleInputStream
.connect(inputStream2)
.flatMap(
new TwoFlinkStreamsMerger(streamId1, inputSchema1,
streamId2, this.outputStreams.get(
streamId2.get_componentId()).get(
streamId2.get_streamId())))
.returns(StormTuple.class);
} else {
mergedInputStream = mergedInputStream
.connect(inputStream2)
.flatMap(
new StormFlinkStreamMerger(streamId2, this.outputStreams.get(
streamId2.get_componentId()).get(streamId2.get_streamId())))
.returns(StormTuple.class);
}
}
final HashMap<String, Fields> boltOutputs = this.outputStreams.get(boltId);
final FlinkOutputFieldsDeclarer declarer = this.declarers.get(boltId);
final SingleOutputStreamOperator<?> outputStream;
if (boltOutputs.size() < 2) { // single output stream or sink
String outputStreamId;
if (boltOutputs.size() == 1) {
outputStreamId = (String) boltOutputs.keySet().toArray()[0];
} else {
outputStreamId = null;
}
final TypeInformation<Tuple> outType = declarer.getOutputType(outputStreamId);
final SingleOutputStreamOperator<Tuple> outStream;
// only one input
if(inputStreams.entrySet().size() == 1) {
BoltWrapper<Tuple, Tuple> boltWrapper = new BoltWrapper<>(bolt, boltId,
inputStreamId1, inputComponentId1, inputSchema1, null);
boltWrapper.setStormTopology(stormTopology);
outStream = singleInputStream.transform(boltId, outType, boltWrapper);
} else {
MergedInputsBoltWrapper<Tuple, Tuple> boltWrapper = new MergedInputsBoltWrapper<Tuple, Tuple>(
bolt, boltId, null);
boltWrapper.setStormTopology(stormTopology);
outStream = mergedInputStream.transform(boltId, outType, boltWrapper);
}
if (outType != null) {
// only for non-sink nodes
final HashMap<String, DataStream<Tuple>> op = new HashMap<>();
op.put(outputStreamId, outStream);
availableInputs.put(boltId, op);
}
outputStream = outStream;
} else {
final TypeInformation<SplitStreamType<Tuple>> outType = (TypeInformation) TypeExtractor
.getForClass(SplitStreamType.class);
final SingleOutputStreamOperator<SplitStreamType<Tuple>> multiStream;
// only one input
if(inputStreams.entrySet().size() == 1) {
final BoltWrapper<Tuple, SplitStreamType<Tuple>> boltWrapperMultipleOutputs = new BoltWrapper<>(
bolt, boltId, inputStreamId1, inputComponentId1, inputSchema1, null);
boltWrapperMultipleOutputs.setStormTopology(stormTopology);
multiStream = singleInputStream.transform(boltId, outType, boltWrapperMultipleOutputs);
} else {
final MergedInputsBoltWrapper<Tuple, SplitStreamType<Tuple>> boltWrapperMultipleOutputs = new MergedInputsBoltWrapper<Tuple, SplitStreamType<Tuple>>(
bolt, boltId, null);
boltWrapperMultipleOutputs.setStormTopology(stormTopology);
multiStream = mergedInputStream.transform(boltId, outType, boltWrapperMultipleOutputs);
}
final SplitStream<SplitStreamType<Tuple>> splitStream = multiStream
.split(new StormStreamSelector<Tuple>());
final HashMap<String, DataStream<Tuple>> op = new HashMap<>();
for (String outputStreamId : boltOutputs.keySet()) {
op.put(outputStreamId,
splitStream.select(outputStreamId).map(
new SplitStreamMapper<Tuple>()));
SingleOutputStreamOperator<Tuple> outStream = splitStream
.select(outputStreamId).map(new SplitStreamMapper<Tuple>());
outStream.getTransformation().setOutputType(declarer.getOutputType(outputStreamId));
op.put(outputStreamId, outStream);
}
availableInputs.put(boltId, op);
outputStream = multiStream;
}
return outputStream;
}
// for internal testing purpose only
public StormTopology getStormTopology() {
return this.stormTopology;
}
}