/**
* Split Sentence is Bolt that splits a sentence given as a string and performs a split operation on the basis of punctuation
* characters and whitespaces as delimiters
*
*
* @author Tarun Sharma
* @version 1.0
* @see <a href="http://www.dream-lab.in/">DREAM:Lab</a>
*
* Copyright 2014 DREAM:Lab, Indian Institute of Science, 2014
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package mypackage.bolts;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
public class SplitSentence extends BaseRichBolt{
/**
*
*/
private static final long serialVersionUID = 8026109946114772620L;
private static Logger LOG = LoggerFactory.getLogger(SplitSentence.class);
OutputCollector collector;
String punct = "(\\p{Punct}|\\s)+";
@Override
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
// TODO Auto-generated method stub
this.collector = collector;
}
@Override
public void execute(Tuple input) {
// TODO Auto-generated method stub
String sentence = input.getString(0);
LOG.info("Inside Split anchors to ids is " + input.getMessageId().getAnchorsToIds());
for (String word : sentence.split(punct)) {
if (!word.equals("")) {
word = word.toLowerCase();
collector.emit(input, new Values(word+"<"));
}
}
collector.ack(input);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// TODO Auto-generated method stub
declarer.declare(new Fields("word"));
}
}