package com.alibaba.jstorm.transactional; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import org.rocksdb.Options; import org.rocksdb.util.SizeUnit; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import backtype.storm.generated.StormTopology; import backtype.storm.serialization.KryoTupleDeserializer; import backtype.storm.serialization.KryoTupleSerializer; import backtype.storm.task.TopologyContext; import backtype.storm.tuple.Tuple; import com.alibaba.jstorm.cache.RocksDBCache; import com.alibaba.jstorm.client.ConfigExtension; import com.alibaba.jstorm.utils.JStormUtils; public class BatchCache extends RocksDBCache { public static Logger LOG = LoggerFactory.getLogger(BatchCache.class); protected class PendingBatch { public String cacheKeyPrefix; public volatile int cacheNum = 0; public int cacheReadIndex = 0; public List<byte[]> tuples = new ArrayList<byte[]>(); private int cacheSize = 0; private Object lock = new Object(); private boolean isActive = true; public void addData(byte[] data) { tuples.add(data); cacheSize += data.length; if (cacheSize > maxFlushSize) { for (byte[] cacheData : tuples) { put(cacheKeyPrefix + String.valueOf(cacheNum), cacheData); cacheNum++; } tuples = new ArrayList<byte[]>(); cacheSize = 0; } }/* public void addData(byte[] data) { tuples.add(data); }*/ public boolean addTuples(byte[] data) { synchronized (lock) { if (isActive) { addData(data); return true; } else { return false; } } } public boolean addTuples(KryoTupleSerializer serializer, Tuple tuple) { byte[] data = serializer.serialize(tuple); synchronized (lock) { if (isActive) { addData(data); return true; } else { return false; } } } public List<byte[]> getTuples() { List<byte[]> cacheBatch = new ArrayList<byte[]>(); synchronized (lock) { if (isActive) { for (; cacheReadIndex < cacheNum; cacheReadIndex++) { String key = cacheKeyPrefix + String.valueOf(cacheReadIndex); cacheBatch.add((byte[]) get(key)); remove(key); } cacheBatch.addAll(tuples); tuples = new ArrayList<byte[]>(); isActive = false; } else { LOG.warn("Try to get cache tuples when cache has been read or removed!"); } } return cacheBatch; }/* public List<byte[]> getTuples() { List<byte[]> ret; synchronized (lock) { if (isActive) { isActive = false; } ret = tuples; tuples = null; } return ret; }*/ public void removeTuples() { synchronized (lock) { for (; cacheReadIndex < cacheNum; cacheReadIndex++) { String key = cacheKeyPrefix + String.valueOf(cacheReadIndex); remove(key); } tuples = new ArrayList<byte[]>(); isActive = false; } }/* public void removeTuples() { synchronized (lock) { tuples = null; isActive = false; } }*/ @Override public String toString() { return "cacheNum: " + cacheNum + ", Pending tuple size:" + (tuples != null ? tuples.size() : 0); } } protected Map stormConf; protected String workerDir; protected String cacheDir; protected int taskId; protected boolean isExactlyOnceMode; protected Map<Integer, Map<Long, PendingBatch>> pendingBatches; protected List<Integer> pendingBatchGroups; protected int pendingBatchGroupIndex = 0; protected int maxFlushSize; protected KryoTupleSerializer serializer; protected KryoTupleDeserializer deserializer; public BatchCache(TopologyContext context, Set<String> upstreamSpoutIds, StormTopology sysTopology) { this.stormConf = context.getStormConf(); this.workerDir = context.getWorkerIdDir(); this.taskId = context.getThisTaskId(); this.isExactlyOnceMode = JStormUtils.parseBoolean(stormConf.get("transaction.exactly.once.mode"), true); this.cacheDir = this.workerDir + "/transactionCache/task-" + taskId; this.pendingBatches = new HashMap<Integer, Map<Long, PendingBatch>>(); this.pendingBatchGroups = new ArrayList<Integer>(); for (String spoutId : upstreamSpoutIds) { int id = TransactionCommon.groupIndex(context.getRawTopology(), spoutId); pendingBatches.put(id, new HashMap<Long, PendingBatch>()); pendingBatchGroups.add(id); } this.maxFlushSize = ConfigExtension.getTransactionCacheBatchFlushSize(stormConf); Options rocksDbOpt = new Options(); rocksDbOpt.setCreateMissingColumnFamilies(true).setCreateIfMissing(true); long bufferSize = ConfigExtension.getTransactionCacheBlockSize(stormConf) != null ? ConfigExtension.getTransactionCacheBlockSize(stormConf) : (1 * SizeUnit.GB); rocksDbOpt.setWriteBufferSize(bufferSize); int maxBufferNum = ConfigExtension.getTransactionMaxCacheBlockNum(stormConf) != null ? ConfigExtension.getTransactionMaxCacheBlockNum(stormConf) : 3; rocksDbOpt.setMaxWriteBufferNumber(maxBufferNum); try { Map<Object, Object> conf = new HashMap<Object, Object>(); conf.put(ROCKSDB_ROOT_DIR, cacheDir); conf.put(ROCKSDB_RESET, true); initDir(conf); initDb(null, rocksDbOpt); } catch (Exception e) { throw new RuntimeException(e); } serializer = new KryoTupleSerializer(stormConf, sysTopology); deserializer = new KryoTupleDeserializer(stormConf, context, sysTopology); } public boolean isExactlyOnceMode() { return isExactlyOnceMode; } private synchronized PendingBatch getPendingBatch(BatchGroupId batchGroupId, boolean creatIfAbsent, boolean remove, Map<Integer, Long> lastSuccessfulBatch) { Map<Long, PendingBatch> batches = pendingBatches.get(batchGroupId.groupId); PendingBatch batch = batches.get(batchGroupId.batchId); if (batch == null && creatIfAbsent && isPendingBatch(batchGroupId, lastSuccessfulBatch)) { batch = new PendingBatch(); batch.cacheKeyPrefix = String.valueOf(batchGroupId.groupId) + String.valueOf(batchGroupId.batchId); batches.put(batchGroupId.batchId, batch); } else if (batch != null) { if (remove) { batches.remove(batchGroupId.batchId); } } return batch; } public boolean cachePendingBatch(BatchGroupId batchGroupId, byte[] data, Map<Integer, Long> lastSuccessfulBatch) { PendingBatch batch = getPendingBatch(batchGroupId, true, false, lastSuccessfulBatch); if (batch != null) { return batch.addTuples(data); } else { return false; } } public boolean cachePendingBatch(BatchGroupId batchGroupId, Tuple tuple, Map<Integer, Long> lastSuccessfulBatch) { PendingBatch batch = getPendingBatch(batchGroupId, true, false, lastSuccessfulBatch); if (batch != null) { byte[] data = serializer.serialize(tuple); return batch.addTuples(data); } else { return false; } } public boolean isPendingBatch(BatchGroupId batchGroupId, Map<Integer, Long> lastSuccessfulBatch) { boolean ret = false; if (batchGroupId.batchId == TransactionCommon.INIT_BATCH_ID) { return ret; } if (isExactlyOnceMode) { // If it is not the same group with current in progress batch, just put incoming tuple into pending queue Long successBatchId = lastSuccessfulBatch.get(batchGroupId.groupId); if (batchGroupId.batchId > successBatchId + 1) { ret = true; } } return ret; } public List<Tuple> getNextPendingTuples(Map<Integer, Long> lastSuccessfulBatch) { List<Tuple> ret = null; List<byte[]> protoBatch = getNextPendingBatch(lastSuccessfulBatch); if (protoBatch != null) { ret = new ArrayList<Tuple>(); for (byte[] data : protoBatch) { ret.add(deserializer.deserialize(data)); } } return ret; } public List<byte[]> getNextPendingBatch(Map<Integer, Long> lastSuccessfulBatch) { List<byte[]> ret = null; PendingBatch batch = null; BatchGroupId batchGroupId = null; for (int i = 0; i < pendingBatchGroups.size(); i++) { int groupId = pendingBatchGroups.get(pendingBatchGroupIndex); pendingBatchGroupIndex = (++pendingBatchGroupIndex) % pendingBatchGroups.size(); long batchId = lastSuccessfulBatch.get(groupId) + 1; batchGroupId = new BatchGroupId(groupId, batchId); batch = getPendingBatch(batchGroupId, false, true, lastSuccessfulBatch); if (batch != null) { // Found a non-empty pending batch, just break the loop and process it. ret = batch.getTuples(); break; } } return ret; } public synchronized void cleanup(int groupId) { Map<Long, PendingBatch> batches = pendingBatches.get(groupId); if (batches != null) { for (Entry<Long, PendingBatch> entry : batches.entrySet()) { PendingBatch batch = entry.getValue(); batch.removeTuples(); } batches.clear(); } } @Override protected byte[] serialize(Object data) { return (byte[]) data; } @Override protected Object deserialize(byte[] data) { return data; } @Override public String toString() { return "pendingBatches: " + pendingBatches.toString() + ", pendingBatchGroups: " + pendingBatchGroups; } }