package com.realtimecep.pilots.analytics.sns.bolts; import backtype.storm.task.TopologyContext; import backtype.storm.topology.BasicOutputCollector; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.base.BaseBasicBolt; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.Map; /** * Twitter Data Extractor Bolts Class. * <p/> * * @author <a href="iamtedwon@gmail.com">Ted Won</a> * @version 0.1.0 */ public class TwitterDataExtractorBolt extends BaseBasicBolt { private static final long serialVersionUID = -3025639777071957758L; private Logger logger = LoggerFactory.getLogger(TwitterDataExtractorBolt.class); @Override public void prepare(Map conf, TopologyContext context) { } @Override public void execute(Tuple input, BasicOutputCollector collector) { String tweet = (String) input.getValueByField("tweet"); // logger.info(tweet); String[] words = tweet.split(" "); for (String word : words) { word = word.trim(); word = word.toLowerCase(); if (!word.isEmpty() && filter(word)) { collector.emit(new Values(word)); } } } @Override public void cleanup() { } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word")); } /** * Filter. * * @param word Word * @return whether to filter */ private boolean filter(String word) { if ("rt".equals(word) || "...".equals(word) || word.length() == 1) { return false; } return true; } }