package com.realtimecep.pilots.analytics.sns.trident; import backtype.storm.tuple.Values; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import storm.trident.operation.BaseFunction; import storm.trident.operation.TridentCollector; import storm.trident.tuple.TridentTuple; /** * HashTag Splitter Function Class. * <p/> * * @author <a href="iamtedwon@gmail.com">Ted Won</a> * @version 0.1.0 */ public class HashTagSplitter extends BaseFunction { private static Logger logger = LoggerFactory.getLogger(HashTagSplitter.class); private static final long serialVersionUID = 4177035756923453986L; @Override public void execute(TridentTuple input, TridentCollector collector) { String tweet = (String) input.getValueByField("tweet"); // logger.info("### " + tweet); String[] words = tweet.split(" "); for (String word : words) { word = word.trim(); word = word.toLowerCase(); if (!word.isEmpty() && filter(word)) { collector.emit(new Values(word)); } } } /** * Filter. * * @param word Word * @return whether to filter */ private boolean filter(String word) { if ("rt".equals(word) || "...".equals(word) || word.length() == 1) { return false; } return true; } }