package experiments; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.StormSubmitter; import backtype.storm.spout.SpoutOutputCollector; import backtype.storm.task.OutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.IRichBolt; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.TopologyBuilder; import backtype.storm.topology.base.BaseRichSpout; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; import com.github.projectflink.generators.Utils; import org.apache.flink.api.java.utils.ParameterTool; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import storm.trident.Stream; import storm.trident.TridentTopology; import storm.trident.operation.Function; import storm.trident.operation.TridentCollector; import storm.trident.operation.TridentOperationContext; import storm.trident.spout.IBatchSpout; import storm.trident.tuple.TridentTuple; import java.net.UnknownHostException; import java.util.Map; import java.util.Random; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Args for local: * * --para 4 --sourceParallelism 2 --sinkParallelism 2 --logfreq 100000 --words 2000 --delay 0 --sleepFreq 0 --latencyFreq 100000 --local */ public class TridentForwardThroughput { public static Logger LOG = LoggerFactory.getLogger(TridentForwardThroughput.class); public static Fields FIELDS = new Fields("string", "time"); public static class Generator implements IBatchSpout { private final int delay; private final int latFreq; private final ParameterTool pt; private long id = 0; private long time = 0; private int sleepFreq; private String[] texts; private int i = 0; private int batchSize; public Generator(ParameterTool pt) { this.delay = pt.getInt("delay"); this.latFreq = pt.getInt("latencyFreq"); this.sleepFreq = pt.getInt("sleepFreq"); this.pt = pt; this.batchSize = pt.getInt("batchSize", 1000); } @Override public void open(Map conf, TopologyContext context) { Random rnd = new Random(1337); texts = new String[pt.getInt("words")]; int totLength = 0; for (int i = 0; i < pt.getInt("words"); i++) { String str = ""; int sentenceLength = rnd.nextInt(25); // up to 16 words per sentence for (int s = 0; s < sentenceLength; s++) { str += Utils.getFastZipfRandomWord(); str += " "; } totLength += str.length(); texts[i] = str; } LOG.info("Average string length "+(totLength/(double)pt.getInt("words"))); } @Override public void emitBatch(long batchId, TridentCollector collector) { int texts_i = 0; for(int i = 0; i < batchSize; i++) { if(delay > 0) { if(id % sleepFreq == 0) { try { Thread.sleep(delay); } catch (InterruptedException e) { e.printStackTrace();} } } // send time at beginning and end of batch if(i == 0 || i + 1 == batchSize) { time = System.currentTimeMillis(); } collector.emit(new Values(texts[texts_i], this.time)); if(++texts_i == texts.length) { texts_i = 0; } time = 0; this.id++; } } @Override public void ack(long batchId) { } @Override public void close() { } @Override public Map getComponentConfiguration() { return null; } @Override public Fields getOutputFields() { return FIELDS; } } public static class Sink implements Function { private final boolean withFT; long received = 0; long start = 0; ParameterTool pt; private long logfreq; private long lastLog = -1; private long lastElements; private long matches = 0; private final Pattern threeDigitAbbr = Pattern.compile("[A-Z]{3}\\."); public Sink(ParameterTool pt) throws UnknownHostException { this.pt = pt; this.withFT = pt.has("ft"); this.logfreq = pt.getInt("logfreq"); } @Override public void prepare(Map conf, TridentOperationContext context) { } @Override public void cleanup() { } @Override public void execute(TridentTuple tuple, TridentCollector collector) { Matcher m = threeDigitAbbr.matcher(tuple.getString(0)); if (m.matches()) { matches++; } if(start == 0) { start = System.currentTimeMillis(); } received++; if(received % logfreq == 0) { long now = System.currentTimeMillis(); // throughput for the last "logfreq" elements if(lastLog == -1) { // init (the first) lastLog = now; lastElements = received; } else { long timeDiff = now - lastLog; long elementDiff = received - lastElements; double ex = (1000/(double)timeDiff); LOG.info("During the last {} ms, we received {} elements. That's {} elements/second/core", timeDiff, elementDiff, elementDiff*ex); // reinit lastLog = now; lastElements = received; } } if(tuple.getLong(1) != 0) { long lat = System.currentTimeMillis() - tuple.getLong(1); LOG.info("Latency {} ms from same machine", lat); } } } public static void main(String[] args) throws Exception { ParameterTool pt = ParameterTool.fromArgs(args); int par = pt.getInt("para"); TridentTopology topology = new TridentTopology(); Stream sourceStream = topology.newStream("source", new Generator(pt)).parallelismHint(pt.getInt("sourceParallelism")); sourceStream.localOrShuffle().each(FIELDS, new Sink(pt), new Fields("dontcare")); Config conf = new Config(); conf.setDebug(false); // conf.setMaxSpoutPending(pt.getInt("maxPending", 1000)); //System.exit(1); if (!pt.has("local")) { conf.setNumWorkers(par); StormSubmitter.submitTopologyWithProgressBar("forward-throughput-"+pt.get("name", "no_name"), conf, topology.build()); } else { conf.setMaxTaskParallelism(par); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("forward-throughput", conf, topology.build()); Thread.sleep(300000); cluster.shutdown(); } } }