package storm.kafka; import backtype.storm.Config; import backtype.storm.metric.api.*; import backtype.storm.spout.SpoutOutputCollector; import backtype.storm.utils.Utils; import com.google.common.collect.ImmutableMap; import java.util.*; import kafka.api.FetchRequest; import kafka.api.OffsetRequest; import kafka.javaapi.consumer.SimpleConsumer; import kafka.javaapi.message.ByteBufferMessageSet; import kafka.message.MessageAndOffset; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import storm.kafka.KafkaSpout.EmitState; import storm.kafka.KafkaSpout.MessageAndRealOffset; import storm.kafka.trident.MaxMetric; public class PartitionManager { public static final Logger LOG = LoggerFactory.getLogger(PartitionManager.class); private final CombinedMetric _fetchAPILatencyMax; private final ReducedMetric _fetchAPILatencyMean; private final CountMetric _fetchAPICallCount; private final CountMetric _fetchAPIMessageCount; static class KafkaMessageId { public GlobalPartitionId partition; public long offset; public KafkaMessageId(GlobalPartitionId partition, long offset) { this.partition = partition; this.offset = offset; } } Long _emittedToOffset; SortedSet<Long> _pending = new TreeSet<Long>(); Long _committedTo; LinkedList<MessageAndRealOffset> _waitingToEmit = new LinkedList<MessageAndRealOffset>(); GlobalPartitionId _partition; SpoutConfig _spoutConfig; String _topologyInstanceId; SimpleConsumer _consumer; DynamicPartitionConnections _connections; ZkState _state; Map _stormConf; public PartitionManager(DynamicPartitionConnections connections, String topologyInstanceId, ZkState state, Map stormConf, SpoutConfig spoutConfig, GlobalPartitionId id) { _partition = id; _connections = connections; _spoutConfig = spoutConfig; _topologyInstanceId = topologyInstanceId; _consumer = connections.register(id.host, id.partition); _state = state; _stormConf = stormConf; String jsonTopologyId = null; Long jsonOffset = null; try { Map<Object, Object> json = _state.readJSON(committedPath()); if(json != null) { jsonTopologyId = (String)((Map<Object,Object>)json.get("topology")).get("id"); jsonOffset = (Long)json.get("offset"); } } catch(Throwable e) { LOG.warn("Error reading and/or parsing at ZkNode: " + committedPath(), e); } if(!topologyInstanceId.equals(jsonTopologyId) && spoutConfig.forceFromStart) { _committedTo = _consumer.getOffsetsBefore(spoutConfig.topic, id.partition, spoutConfig.startOffsetTime, 1)[0]; LOG.info("Using startOffsetTime to choose last commit offset."); } else if(jsonTopologyId == null || jsonOffset == null) { // failed to parse JSON? _committedTo = _consumer.getOffsetsBefore(spoutConfig.topic, id.partition, -1, 1)[0]; LOG.info("Setting last commit offset to HEAD."); } else { _committedTo = jsonOffset; LOG.info("Read last commit offset from zookeeper: " + _committedTo); } LOG.info("Starting Kafka " + _consumer.host() + ":" + id.partition + " from offset " + _committedTo); _emittedToOffset = _committedTo; _fetchAPILatencyMax = new CombinedMetric(new MaxMetric()); _fetchAPILatencyMean = new ReducedMetric(new MeanReducer()); _fetchAPICallCount = new CountMetric(); _fetchAPIMessageCount = new CountMetric(); } public Map getMetricsDataMap() { Map ret = new HashMap(); ret.put(_partition + "/fetchAPILatencyMax", _fetchAPILatencyMax.getValueAndReset()); ret.put(_partition + "/fetchAPILatencyMean", _fetchAPILatencyMean.getValueAndReset()); ret.put(_partition + "/fetchAPICallCount", _fetchAPICallCount.getValueAndReset()); ret.put(_partition + "/fetchAPIMessageCount", _fetchAPIMessageCount.getValueAndReset()); return ret; } //returns false if it's reached the end of current batch public EmitState next(SpoutOutputCollector collector) { if(_waitingToEmit.isEmpty()) fill(); while(true) { MessageAndRealOffset toEmit = _waitingToEmit.pollFirst(); if(toEmit==null) { return EmitState.NO_EMITTED; } Iterable<List<Object>> tups = _spoutConfig.scheme.deserialize(Utils.toByteArray(toEmit.msg.payload())); if(tups!=null) { for(List<Object> tup: tups) collector.emit(tup, new KafkaMessageId(_partition, toEmit.offset)); break; } else { ack(toEmit.offset); } } if(!_waitingToEmit.isEmpty()) { return EmitState.EMITTED_MORE_LEFT; } else { return EmitState.EMITTED_END; } } private void fill() { //LOG.info("Fetching from Kafka: " + _consumer.host() + ":" + _partition.partition + " from offset " + _emittedToOffset); long start = System.nanoTime(); ByteBufferMessageSet msgs = _consumer.fetch( new FetchRequest( _spoutConfig.topic, _partition.partition, _emittedToOffset, _spoutConfig.fetchSizeBytes)); long end = System.nanoTime(); long millis = (end - start) / 1000000; _fetchAPILatencyMax.update(millis); _fetchAPILatencyMean.update(millis); _fetchAPICallCount.incr(); _fetchAPIMessageCount.incrBy(msgs.underlying().size()); int numMessages = msgs.underlying().size(); if(numMessages>0) { LOG.info("Fetched " + numMessages + " messages from Kafka: " + _consumer.host() + ":" + _partition.partition); } for(MessageAndOffset msg: msgs) { _pending.add(_emittedToOffset); _waitingToEmit.add(new MessageAndRealOffset(msg.message(), _emittedToOffset)); _emittedToOffset = msg.offset(); } if(numMessages>0) { LOG.info("Added " + numMessages + " messages from Kafka: " + _consumer.host() + ":" + _partition.partition + " to internal buffers"); } } public void ack(Long offset) { _pending.remove(offset); } public void fail(Long offset) { //TODO: should it use in-memory ack set to skip anything that's been acked but not committed??? // things might get crazy with lots of timeouts if(_emittedToOffset > offset) { _emittedToOffset = offset; _pending.tailSet(offset).clear(); } } public void commit() { LOG.info("Committing offset for " + _partition); long committedTo; if(_pending.isEmpty()) { committedTo = _emittedToOffset; } else { committedTo = _pending.first(); } if(committedTo!=_committedTo) { LOG.info("Writing committed offset to ZK: " + committedTo); Map<Object, Object> data = (Map<Object,Object>)ImmutableMap.builder() .put("topology", ImmutableMap.of("id", _topologyInstanceId, "name", _stormConf.get(Config.TOPOLOGY_NAME))) .put("offset", committedTo) .put("partition", _partition.partition) .put("broker", ImmutableMap.of("host", _partition.host.host, "port", _partition.host.port)) .put("topic", _spoutConfig.topic).build(); _state.writeJSON(committedPath(), data); LOG.info("Wrote committed offset to ZK: " + committedTo); _committedTo = committedTo; } LOG.info("Committed offset " + committedTo + " for " + _partition); } private String committedPath() { return _spoutConfig.zkRoot + "/" + _spoutConfig.id + "/" + _partition; } public long queryPartitionOffsetLatestTime() { return _consumer.getOffsetsBefore(_spoutConfig.topic, _partition.partition, OffsetRequest.LatestTime(), 1)[0]; } public long lastCommittedOffset() { return _committedTo; } public long lastCompletedOffset() { if(_pending.isEmpty()) { return _emittedToOffset; } else { return _pending.first(); } } public GlobalPartitionId getPartition() { return _partition; } public void close() { _connections.unregister(_partition.host, _partition.partition); } }