package com.zdatainc.rts.storm;
import org.apache.log4j.Logger;
import java.util.Map;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
public class TextFilterBolt extends BaseBasicBolt
{
private static final long serialVersionUID = 42L;
private static Logger LOGGER = Logger.getLogger(TextFilterBolt.class);
public void declareOutputFields(OutputFieldsDeclarer declarer)
{
declarer.declare(new Fields("tweet_id", "tweet_text"));
}
public void execute(Tuple input, BasicOutputCollector collector)
{
LOGGER.debug("removing ugly characters");
Long id = input.getLong(input.fieldIndex("tweet_id"));
String text = input.getString(input.fieldIndex("tweet_text"));
text = text.replaceAll("[^a-zA-Z\\s]", "").trim().toLowerCase();
collector.emit(new Values(id, text));
}
public Map<String, Object> getComponentConfiguration() { return null; }
}