package com.github.purplepapa.Storm_Simple_Crawler;
import java.util.Map;
import java.util.Properties;
import orestes.bloomfilter.BloomFilter;
import orestes.bloomfilter.FilterBuilder;
import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Tuple;
public class ForwardToKafkaBolt extends BaseRichBolt {
/**
*
*/
private static final long serialVersionUID = 1L;
private Producer<String, String> producer;
private String zkConnect, serializerClass, topic;
OutputCollector _collector;
public ForwardToKafkaBolt(String zkConnect, String serializerClass,
String topic) {
this.zkConnect = zkConnect;
this.serializerClass = serializerClass;
this.topic = topic;
}
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
// TODO Auto-generated method stub
_collector = collector;
Properties props = new Properties();
props.put("metadata.broker.list", zkConnect);
props.put("serializer.class", serializerClass);
ProducerConfig config = new ProducerConfig(props);
producer = new Producer<String, String>(config);
}
public void execute(Tuple input) {
// TODO Auto-generated method stub
String msg = (String) input.getValue(0);
KeyedMessage<String, String> data = new KeyedMessage<String, String>(
topic, msg);
producer.send(data);
System.out.println("forward2kafka:"+data);
String host = "localhost";
int port = 6379;
String filterName = "urlbloomfilter";
// Open a Redis-backed Bloom filter
BloomFilter<String> bfr = new FilterBuilder(1000, 0.01)
.name(filterName).redisBacked(true).redisHost(host)
.redisPort(port).buildBloomFilter();
if (!bfr.contains(msg)) {
bfr.add(msg);
System.out.println("FORWARD NOT DUP:" + msg);
} else {
System.out.println("FORWARD MAY DUP:" + msg);
}
// producer.close();
_collector.ack(input);
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// TODO Auto-generated method stub
}
// emit tuples to kafka queue broker
}