package com.realtimecep.storm.starter.transactional;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.coordination.BatchOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.testing.MemoryTransactionalSpout;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.topology.base.BaseTransactionalBolt;
import backtype.storm.transactional.ICommitter;
import backtype.storm.transactional.TransactionAttempt;
import backtype.storm.transactional.TransactionalTopologyBuilder;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* This class defines a more involved transactional topology then TransactionalGlobalCount. This topology
* processes a stream of words and produces two outputs:
*
* 1. A count for each word (stored in a database)
* 2. The number of words for every bucket of 10 counts. So it stores in the database how many words have appeared
* 0-9 times, how many have appeared 10-19 times, and so on.
* count 수치 범위에 따라서 담는 버킷을 구분하고 처리 결과 담긴 버킷의 내용으로 데이터 통계를 파악 할 수 있다.
* top N count에 활용 할 수 있다.
*
* A batch of words can cause the bucket counts to decrement for some buckets and increment for others as words move
* between buckets as their counts accumulate.
*
* Debug - tuple object
* source: spout:7, stream: default, id: {}, [1:7101600008655646784, dog]
*
* TxID:AttemptID
* 1:7101600008655646784
*
*/
public class MyTransactionalWords {
public static class CountValue {
Integer prev_count = null;
int count = 0;
//TransactionAttempt id
BigInteger txid = null;
@Override
public String toString() {
return "CountValue{" +
"prev_count=" + prev_count +
", count=" + count +
", txid=" + txid +
'}';
}
}
public static class BucketValue {
int count = 0;
BigInteger txid;
@Override
public String toString() {
return "BucketValue{" +
"count=" + count +
", txid=" + txid +
'}';
}
}
public static final int BUCKET_SIZE = 3;
public static Map<String, CountValue> COUNT_DATABASE = new HashMap<String, CountValue>();
public static Map<Integer, BucketValue> BUCKET_DATABASE = new HashMap<Integer, BucketValue>();
public static final int PARTITION_TAKE_PER_BATCH = 3;
public static final Map<Integer, List<List<Object>>> DATA = new HashMap<Integer, List<List<Object>>>() {{
put(0, new ArrayList<List<Object>>() {{
add(new Values("cat"));
add(new Values("dog"));
add(new Values("chicken"));
add(new Values("tedwon"));
add(new Values("dog"));
add(new Values("apple"));
}});
put(1, new ArrayList<List<Object>>() {{
add(new Values("cat"));
add(new Values("dog"));
add(new Values("apple"));
add(new Values("banana"));
}});
put(2, new ArrayList<List<Object>>() {{
add(new Values("cat"));
add(new Values("cat"));
add(new Values("cat"));
add(new Values("cat"));
add(new Values("cat"));
add(new Values("dog"));
add(new Values("dog"));
add(new Values("dog"));
add(new Values("dog"));
}});
}};
public static class KeyedCountUpdater extends BaseTransactionalBolt implements ICommitter {
Map<String, Integer> _counts = new HashMap<String, Integer>();
BatchOutputCollector _collector;
TransactionAttempt _id;
int _count = 0;
@Override
public void prepare(Map conf, TopologyContext context, BatchOutputCollector collector, TransactionAttempt id) {
_collector = collector;
_id = id;
}
@Override
public void execute(Tuple tuple) {
// System.out.println(tuple);
String key = tuple.getString(1);
Integer curr = _counts.get(key);
if(curr==null) curr = 0;
// word count
_counts.put(key, curr + 1);
// System.out.println(_counts);
// Utils.sleep(1000);
}
@Override
public void finishBatch() {
// finishBatch 메소드는 execute 메소드와 상관없이 연속으로 호출됨
// System.out.println("finishBatch");
// word count 데이터가 들어있는 map을 꺼내서 외부 DATABASE에 넣는다.
for(String key: _counts.keySet()) {
CountValue val = COUNT_DATABASE.get(key);
CountValue newVal;
if(val==null || !val.txid.equals(_id)) {
// 처음이거나 다른 tx인 경우
newVal = new CountValue();
newVal.txid = _id.getTransactionId();
if(val!=null) {
newVal.prev_count = val.count;
newVal.count = val.count;
}
newVal.count = newVal.count + _counts.get(key);
COUNT_DATABASE.put(key, newVal);
// System.out.println(key + " : " + newVal);
} else {
// 두번째 이상이고 같은 tx인경우
newVal = val;
}
_collector.emit(new Values(_id, key, newVal.count, newVal.prev_count));
// System.out.println(COUNT_DATABASE);
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// prev-count: 이전 tx에서 count했던 값
declarer.declare(new Fields("id", "key", "count", "prev-count"));
}
}
public static class Bucketize extends BaseBasicBolt {
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
// System.out.println(tuple);
TransactionAttempt attempt = (TransactionAttempt) tuple.getValue(0);
int curr = tuple.getInteger(2);
Integer prev = tuple.getInteger(3);
int currBucket = curr / BUCKET_SIZE;
Integer prevBucket = null;
if(prev!=null) {
prevBucket = prev / BUCKET_SIZE;
}
if(prevBucket==null) {
Values tuple1 = new Values(attempt, currBucket, 1);
collector.emit(tuple1);
// System.out.println("prevBucket==null :" + tuple1);
} else if(currBucket != prevBucket) {
Values tuple1 = new Values(attempt, currBucket, 1);
collector.emit(tuple1);
// System.out.println("currBucket != prevBucket :" + tuple1);
Values tuple2 = new Values(attempt, prevBucket, -1);
collector.emit(tuple2);
// System.out.println("currBucket != prevBucket :" + tuple2);
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("attempt", "bucket", "delta"));
}
}
public static class BucketCountUpdater extends BaseTransactionalBolt {
Map<Integer, Integer> _accum = new HashMap<Integer, Integer>();
BatchOutputCollector _collector;
TransactionAttempt _attempt;
int _count = 0;
@Override
public void prepare(Map conf, TopologyContext context, BatchOutputCollector collector, TransactionAttempt attempt) {
_collector = collector;
_attempt = attempt;
}
@Override
public void execute(Tuple tuple) {
Integer bucket = tuple.getInteger(1);
Integer delta = tuple.getInteger(2);
Integer curr = _accum.get(bucket);
if(curr==null) curr = 0;
_accum.put(bucket, curr + delta);
}
@Override
public void finishBatch() {
for(Integer bucket: _accum.keySet()) {
BucketValue currVal = BUCKET_DATABASE.get(bucket);
BucketValue newVal;
if(currVal==null || !currVal.txid.equals(_attempt.getTransactionId())) {
newVal = new BucketValue();
newVal.txid = _attempt.getTransactionId();
newVal.count = _accum.get(bucket);
if(currVal!=null) newVal.count += currVal.count;
BUCKET_DATABASE.put(bucket, newVal);
} else {
newVal = currVal;
}
Values tuple = new Values(_attempt, bucket, newVal.count);
_collector.emit(tuple);
// System.out.println(tuple);
System.out.println(BUCKET_DATABASE);
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("id", "bucket", "count"));
}
}
public static void main(String[] args) throws Exception {
MemoryTransactionalSpout spout = new MemoryTransactionalSpout(DATA, new Fields("word"), PARTITION_TAKE_PER_BATCH);
TransactionalTopologyBuilder builder = new TransactionalTopologyBuilder("top-n-words", "spout", spout, 1);
// 단순 word count
builder.setBolt("count", new KeyedCountUpdater(), 5)
.fieldsGrouping("spout", new Fields("word"));
// ?
builder.setBolt("bucketize", new Bucketize())
.noneGrouping("count");
builder.setBolt("buckets", new BucketCountUpdater(), 5)
.fieldsGrouping("bucketize", new Fields("bucket"));
LocalCluster cluster = new LocalCluster();
Config config = new Config();
config.setDebug(true);
config.setMaxSpoutPending(3);
cluster.submitTopology("top-n-topology", config, builder.buildTopology());
// Thread.sleep(3000);
// cluster.shutdown();
}
}