package com.zdatainc.rts.storm;
import org.apache.log4j.Logger;
import java.util.Map;
import java.util.List;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
public class StemmingBolt extends BaseBasicBolt
{
private static final long serialVersionUID = 42L;
private static Logger LOGGER = Logger.getLogger(StemmingBolt.class);
public void declareOutputFields(OutputFieldsDeclarer declarer)
{
declarer.declare(new Fields("tweet_id", "tweet_text"));
}
public void execute(Tuple input, BasicOutputCollector collector)
{
LOGGER.debug("removing stop words");
Long id = input.getLong(input.fieldIndex("tweet_id"));
String text = input.getString(input.fieldIndex("tweet_text"));
List<String> stopWords = StopWords.getWords();
for (String word : stopWords)
{
text = text.replaceAll("\\b" + word + "\\b", "");
}
collector.emit(new Values(id, text));
}
public Map<String, Object> getComponentConfiguration() { return null; }
}