package com.github.projectflink.streaming;
import com.dataartisans.flink.example.eventpattern.Event;
import com.dataartisans.flink.example.eventpattern.kafka.EventDeSerializer;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Minimalistic Flink Kafka reader to measure read performance (similar to the Standalone kafka reader
*/
public class FlinkKafkaReader {
public static Logger LOG = LoggerFactory.getLogger(FlinkKafkaReader.class);
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
final ParameterTool pt = ParameterTool.fromArgs(args);
DataStreamSource<Event> src = see.addSource(new FlinkKafkaConsumer<Event>(pt.getRequired("topic"), new EventDeSerializer(), pt.getProperties()));
src.flatMap(new FlatMapFunction<Event, Integer>() {
long received = 0;
int logfreq = pt.getInt("logfreq");
long lastLog = -1;
long lastElements = 0;
@Override
public void flatMap(Event event, Collector<Integer> collector) throws Exception {
// print some stats
received++;
if (received % logfreq == 0) {
// throughput over entire time
long now = System.currentTimeMillis();
// throughput for the last "logfreq" elements
if(lastLog == -1) {
// init (the first)
lastLog = now;
lastElements = received;
} else {
long timeDiff = now - lastLog;
long elementDiff = received - lastElements;
double ex = (1000/(double)timeDiff);
LOG.info("During the last {} ms, we received {} elements. That's {} elements/second/core. Total read {}", timeDiff, elementDiff, elementDiff*ex, received);
// reinit
lastLog = now;
lastElements = received;
}
}
}
});
see.execute();
}
}