package org.hackreduce.storm.example.twitter; import static org.hackreduce.storm.HackReduceStormSubmitter.teamPrefix; import java.util.Map; import org.hackreduce.storm.HackReduceStormSubmitter; import org.hackreduce.storm.example.common.Common; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import storm.kafka.KafkaSpout; import storm.kafka.SpoutConfig; import twitter4j.Status; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.generated.AlreadyAliveException; import backtype.storm.generated.InvalidTopologyException; import backtype.storm.spout.SchemeAsMultiScheme; import backtype.storm.task.OutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.TopologyBuilder; import backtype.storm.topology.base.BaseRichBolt; import backtype.storm.tuple.Tuple; public class ReTweeter { public static class ReTweetLogger extends BaseRichBolt { private static Logger LOG = LoggerFactory.getLogger(ReTweetLogger.class); private OutputCollector collector; @Override public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) { this.collector = outputCollector; } @Override public void execute(Tuple tuple) { Status status = (Status) tuple.getValue(0); if(status.isRetweet()) { LOG.info("User {} retweeted {}", status.getUser().getScreenName(), status.getRetweetedStatus().getUser().getScreenName()); } collector.ack(tuple); } @Override public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) { // This bolt provides no output } } public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException, InterruptedException { TopologyBuilder builder = new TopologyBuilder(); // Configuration Config config = new Config(); SpoutConfig spoutConfig = new SpoutConfig( Common.getKafkaHosts(), "twitter-sample", // The Kafka topic name "/kafkastorm", // Where to store state in ZK (don't change this) teamPrefix("retweet-logger") // Unique id of this spout. This needs to be unique across ALL topologies. ); spoutConfig.scheme = new SchemeAsMultiScheme(new TweetScheme()); // You can parse the tweets yourself if you prefer // This tells the spout to start at the very beginning of the data stream // If you just want to resume where you left off, remove this line spoutConfig.forceStartOffsetTime(-2); builder.setSpout("tweets", new KafkaSpout(spoutConfig)); builder.setBolt("retweet-logger", new ReTweetLogger()) .shuffleGrouping("tweets"); // Launch if(args.length > 0 && args[0].equalsIgnoreCase("local")) { LocalCluster lc = new LocalCluster(); lc.submitTopology("retweet-logger", config, builder.createTopology()); Thread.sleep(120000); // 2 minutes lc.shutdown(); } else { HackReduceStormSubmitter.submitTopology("retweet-logger", config, builder.createTopology()); } } }