package com.produban.openbus.analysis; import backtype.storm.Config; import backtype.storm.StormSubmitter; import backtype.storm.generated.StormTopology; import backtype.storm.tuple.Fields; import com.lexicalscope.jewel.cli.ArgumentValidationException; import com.lexicalscope.jewel.cli.CliFactory; import com.lexicalscope.jewel.cli.Option; import com.produban.openbus.persistence.ElasticSearchIndexer; import com.produban.openbus.util.Common; import storm.trident.Stream; import storm.trident.TridentTopology; import javax.ws.rs.OPTIONS; import java.util.ArrayList; import java.util.List; /** * A Trident topology for processing a stream of Tweets */ public class TweetsTopology { public static StormTopology buildTopology (TweetsTopologyOptions options){ TridentTopology topology = new TridentTopology(); BrokerSpout kafkaTweetSpout = new BrokerSpout( options.getKafkaTopic(), Common.join(options.getZookeeper(), ","), options.getKafkaClientID(), options.isForceFromStart()); ElasticSearchIndexer esIndexer = new ElasticSearchIndexer(options.getElasticSearchClusterName(), options.getElasticSearchIndex(), options.getElasticSearchNodes()); List<String> tweetFields = new ArrayList<>(); tweetFields.add("tweetId"); tweetFields.add("rawDate"); tweetFields.add("date"); tweetFields.add("text"); tweetFields.add("lang"); tweetFields.add("retweetCount"); tweetFields.add("longitude"); tweetFields.add("latitude"); tweetFields.add("userFollowerCount"); tweetFields.add("userLocation"); tweetFields.add("userName"); tweetFields.add("userId"); tweetFields.add("userImgUrl"); tweetFields.add("urls"); tweetFields.add("mentionedUsers"); tweetFields.add("hashtags"); Stream stream = topology.newStream("spout", kafkaTweetSpout.getOpaquePartitionedTridentSpout()) //topology.newStream("spout", kafkaTweetSpout.getPartitionedTridentSpout()) .each(new Fields("bytes"), new TweetJsonDecoder(), new Fields(tweetFields)) .each(new Fields("text"), new KeywordsFilter(options.getFilterKeyWords())) //do something interesting here .each(new Fields(tweetFields), esIndexer, new Fields("indexed")); return topology.build(); } public static void main(String[] args) throws Exception { //parse topology arguments: TweetsTopologyOptions appOptions = null; try { appOptions = CliFactory.parseArguments(TweetsTopologyOptions.class, args); } catch(ArgumentValidationException e) { System.out.println(e.getMessage()); System.exit(-1); } Config stormConfig = new Config(); stormConfig.setNumWorkers(appOptions.getStormNumWorkers()); StormSubmitter.submitTopology(appOptions.getTopologyName(), stormConfig, buildTopology(appOptions)); } /* Definition of topology arguments (this uses JewelCLI library) */ public interface TweetsTopologyOptions { @Option(defaultValue = "tweetsTopology") String getTopologyName(); @Option List<String> getZookeeper(); @Option List<String> getElasticSearchNodes(); @Option String getElasticSearchClusterName(); @Option String getElasticSearchIndex(); @Option String getKafkaTopic(); @Option(defaultValue = "3") int getStormNumWorkers(); @Option(defaultValue = "tweetsTopology") String getKafkaClientID(); @Option List<String> getFilterKeyWords(); @Option boolean isForceFromStart(); @Option(shortName = "h", helpRequest = true) boolean getHelp(); } }