package com.github.projectflink.streaming;
import com.dataartisans.flink.example.eventpattern.Event;
import com.dataartisans.flink.example.eventpattern.EventsGenerator;
import com.dataartisans.flink.example.eventpattern.StateMachineMapper;
import com.dataartisans.flink.example.eventpattern.kafka.EventDeSerializer;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Properties;
public class KafkaGenerator {
private static final Logger LOG = LoggerFactory.getLogger(KafkaGenerator.class);
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
final ParameterTool pt = ParameterTool.fromArgs(args);
see.getConfig().setGlobalJobParameters(pt);
if(pt.has("p")) {
see.setParallelism(pt.getInt("p"));
}
DataStreamSource<Event> src = see.addSource(new RichParallelSourceFunction<Event>() {
private static final long serialVersionUID = 1L;
int min;
int max;
EventsGenerator eg;
boolean running = true;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
int range = Integer.MAX_VALUE / getRuntimeContext().getNumberOfParallelSubtasks();
this.min = range * getRuntimeContext().getIndexOfThisSubtask();
this.max = min + range;
eg = new EventsGenerator();
LOG.info("Creating new EventsGenerator from {} to {}", min, max);
}
@Override
public void run(SourceContext<Event> sourceContext) throws Exception {
long generated = 0;
long logfreq = pt.getInt("logfreq");
long lastLog = -1;
long lastElements = 0;
int delay = pt.getInt("delay");
int sleepFreq = pt.getInt("sleepFreq");
while (running) {
if(delay > 0) {
if(generated % sleepFreq == 0) {
try { Thread.sleep(delay); } catch (InterruptedException e) { e.printStackTrace();}
}
}
Event gen = eg.next(min, max);
sourceContext.collect(gen);
// LOG.info("Generated event {}", gen);
generated++;
if (generated % logfreq == 0) {
// throughput over entire time
long now = System.currentTimeMillis();
// throughput for the last "logfreq" elements
if (lastLog == -1) {
// init (the first)
lastLog = now;
lastElements = generated;
} else {
long timeDiff = now - lastLog;
long elementDiff = generated - lastElements;
double ex = (1000 / (double) timeDiff);
LOG.info("During the last {} ms, we generated {} elements. That's {} elements/second/core", timeDiff, elementDiff, elementDiff * ex);
// reinit
lastLog = now;
lastElements = generated;
}
}
}
}
@Override
public void cancel() {
running = false;
}
});
// run the state machine here as well to see illegal transitions
src.partitionByHash(new KeySelector<Event, Integer>() {
private static final long serialVersionUID = 1L;
@Override
public Integer getKey(Event event) throws Exception {
return event.sourceAddress();
}
}).flatMap(new StateMachineMapper(pt));
// write stuff into Kafka
String zkServer = pt.get("zookeeper");
Properties props = pt.getProperties();
// if(pt.has("localPartitioner")) {
// PimpedKafkaSink.LocalKafkaPartitioner part = new PimpedKafkaSink.LocalKafkaPartitioner(zkServer, pt.getRequired("topic"));
// props.put("partitioner.class", PartitionerWrapper.class.getCanonicalName());
// java serialization will do the rest.
// props.put(PartitionerWrapper.SERIALIZED_WRAPPER_NAME, part);
// }
//
src.addSink(new FlinkKafkaProducer<Event>(pt.getRequired("topic"), new EventDeSerializer(), pt.getProperties()));
see.execute();
}
}