package org.hackreduce.storm; import backtype.storm.spout.SpoutOutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.base.BaseRichSpout; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Values; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.Map; import java.util.concurrent.atomic.AtomicLong; /** * This spout reads data from a CSV file. It is only suitable for testing in local mode */ public class LineSpout extends BaseRichSpout { private static final Logger LOG = LoggerFactory.getLogger(LineSpout.class); private String fileName; private SpoutOutputCollector _collector; private BufferedReader reader; private AtomicLong linesRead; /** * Prepare the spout. This method is called once when the topology is submitted * @param conf * @param context * @param collector */ @Override public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { linesRead = new AtomicLong(0); _collector = collector; try { fileName= (String) conf.get("linespout.file"); reader = new BufferedReader(new FileReader(fileName)); // read and ignore the header if one exists } catch (Exception e) { throw new RuntimeException(e); } } @Override public void deactivate() { try { reader.close(); } catch (IOException e) { LOG.warn("Problem closing file"); } } /** * Storm will call this method repeatedly to pull tuples from the spout */ @Override public void nextTuple() { try { String line = reader.readLine(); if (line != null) { long id = linesRead.incrementAndGet(); _collector.emit(new Values(line), id); } else { System.out.println("Finished reading file, " + linesRead.get() + " lines read"); Thread.sleep(10000); } } catch (Exception e) { e.printStackTrace(); } } /** * Storm will call this method when tuples are acked * @param id */ @Override public void ack(Object id) { } /** * Storm will call this method when tuples fail to process downstream * @param id */ @Override public void fail(Object id) { System.err.println("Failed line number " + id); } /** * Tell storm which fields are emitted by the spout * @param declarer */ @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { // read csv header to get field info declarer.declare(new Fields("line")); } }