package org.hackreduce.storm.vmc.bolts;
import java.net.URL;
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import com.sun.syndication.feed.synd.SyndEntry;
import com.sun.syndication.feed.synd.SyndFeed;
import com.sun.syndication.fetcher.FeedFetcher;
import com.sun.syndication.fetcher.impl.HttpURLFeedFetcher;
import org.hackreduce.storm.vmc.common.BaseComponent;
/**
*
* Based on http://www.datasalt.com/2012/01/real-time-feed-processing-with-storm/
* with a few differences
* 1) Outputs city and the title of the craigs list ad/entry.
* 2) Updated to use 0.8.2 (this project uses 0.6)
*
*/
public class CraigsListFeedBolt extends BaseComponent implements IRichBolt
{
private OutputCollector collector;
@Override
public void prepare(Map conf, TopologyContext context, OutputCollector collector)
{
this.collector = collector;
this.conf = conf;
}
@Override
public void execute(Tuple input)
{
// This bolt has no interest in tick tuples
if ("__tick".equals(input.getSourceStreamId())) return ;
FeedFetcher feedFetcher = new HttpURLFeedFetcher();
String feedUrl = input.getStringByField("feed");
try
{
SyndFeed feed = feedFetcher.retrieveFeed(new URL(feedUrl));
for (Object entry : feed.getEntries())
{
SyndEntry se = (SyndEntry)entry;
String city = input.getString(0);
//System.out.println("City: [" + city + "] with entry [" + se.getTitle() + "].");
this.collector.emit(new Values(city, se.getTitle(), se.getLink(), city));
}
this.collector.ack(input);
}
catch (Throwable t)
{
System.err.println("Problem loading [" + feedUrl + "].");
t.printStackTrace();
this.collector.fail(input);
}
}
@Override
public void cleanup() {}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer)
{declarer.declare(new Fields("city", "title", "link", "groupBy"));}
}