package com.github.projectflink.streaming;
import com.github.projectflink.generators.Utils;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RichFlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
import org.apache.flink.util.Collector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.InetAddress;
import java.util.Random;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class ForwardThroughput {
private static final Logger LOG = LoggerFactory.getLogger(ForwardThroughput.class);
public static class Type extends Tuple2<String, Long> {
public Type(String value0, Long value2) {
super(value0, value2);
}
public Type() {
}
}
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
final ParameterTool pt = ParameterTool.fromArgs(args);
see.getConfig().setGlobalJobParameters(pt);
// see.getConfig().enableObjectReuse();
// see.setParallelism(1);
if(pt.has("timeout")) {
see.setBufferTimeout(pt.getLong("timeout"));
}
if(pt.has("ft")) {
see.enableCheckpointing(pt.getLong("ft"));
}
DataStreamSource<Type> src = see.addSource(new RichParallelSourceFunction<Type>() {
String[] texts;
boolean running = true;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
Random rnd = new Random(1337);
texts = new String[pt.getInt("words")];
int totLength = 0;
for (int i = 0; i < pt.getInt("words"); i++) {
String str = "";
int sentenceLength = rnd.nextInt(25); // up to 16 words per sentence
for (int s = 0; s < sentenceLength; s++) {
str += Utils.getFastZipfRandomWord();
str += " ";
}
totLength += str.length();
texts[i] = str;
}
LOG.info("Average string length "+(totLength/(double)pt.getInt("words")));
}
@Override
public void run(SourceContext<Type> ctx) throws Exception {
int i = 0;
long time = 0L;
long id = 0;
int latFreq = pt.getInt("latencyFreq");
while (running) {
if(id++ % latFreq == 0) {
time = System.currentTimeMillis();
}
ctx.collect(new Type(texts[i++], time));
if (i == texts.length) {
i = 0;
}
time = 0L;
}
}
@Override
public void cancel() {
running = false;
}
});
src.flatMap(new RichFlatMapFunction<Type, Integer>() {
long received = 0;
long logfreq = pt.getInt("logfreq");
long lastLog = -1;
long lastElements = 0;
long matches = 0;
private final Pattern threeDigitAbbr = Pattern.compile("[A-Z]{3}\\.");
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
}
@Override
public void flatMap(Type element, Collector<Integer> collector) throws Exception {
Matcher m = threeDigitAbbr.matcher(element.f0);
if (m.matches()) {
matches++;
}
received++;
if (received % logfreq == 0) {
// throughput over entire time
long now = System.currentTimeMillis();
// throughput for the last "logfreq" elements
if (lastLog == -1) {
// init (the first)
lastLog = now;
lastElements = received;
} else {
long timeDiff = now - lastLog;
long elementDiff = received - lastElements;
double ex = (1000 / (double) timeDiff);
LOG.info("During the last {} ms, we received {} elements. That's {} elements/second/core", timeDiff, elementDiff, Double.valueOf(elementDiff * ex).longValue());
// reinit
lastLog = now;
lastElements = received;
}
}
if(element.f1 != 0L) {
long lat = System.currentTimeMillis() - element.f1;
LOG.info("Latency {} ms from same machine", lat);
}
}
});
see.execute("Forward Throughout "+pt.toMap());
}
}