package storm.kafka.trident; import java.net.ConnectException; import java.util.*; import backtype.storm.metric.api.CombinedMetric; import backtype.storm.metric.api.IMetric; import backtype.storm.metric.api.ReducedMetric; import com.google.common.collect.ImmutableMap; import backtype.storm.utils.Utils; import kafka.api.FetchRequest; import kafka.api.OffsetRequest; import kafka.javaapi.consumer.SimpleConsumer; import kafka.javaapi.message.ByteBufferMessageSet; import kafka.message.Message; import kafka.message.MessageAndOffset; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import storm.kafka.DynamicPartitionConnections; import storm.kafka.GlobalPartitionId; import storm.kafka.HostPort; import storm.kafka.KafkaConfig.StaticHosts; import storm.kafka.KafkaConfig.ZkHosts; import storm.trident.operation.TridentCollector; public class KafkaUtils { public static final Logger LOG = LoggerFactory.getLogger(KafkaUtils.class); public static IBrokerReader makeBrokerReader(Map stormConf, TridentKafkaConfig conf) { if(conf.hosts instanceof StaticHosts) { return new StaticBrokerReader((StaticHosts) conf.hosts); } else { return new ZkBrokerReader(stormConf, conf.topic, (ZkHosts) conf.hosts); } } public static List<GlobalPartitionId> getOrderedPartitions(Map<String, List> partitions) { List<GlobalPartitionId> ret = new ArrayList(); for(String host: new TreeMap<String, List>(partitions).keySet()) { List info = partitions.get(host); long port = (Long) info.get(0); long numPartitions = (Long) info.get(1); HostPort hp = new HostPort(host, (int) port); for(int i=0; i<numPartitions; i++) { ret.add(new GlobalPartitionId(hp, i)); } } return ret; } public static Map emitPartitionBatchNew(TridentKafkaConfig config, SimpleConsumer consumer, GlobalPartitionId partition, TridentCollector collector, Map lastMeta, String topologyInstanceId, String topologyName, ReducedMetric meanMetric, CombinedMetric maxMetric) { long offset; if(lastMeta!=null) { String lastInstanceId = null; Map lastTopoMeta = (Map) lastMeta.get("topology"); if(lastTopoMeta!=null) { lastInstanceId = (String) lastTopoMeta.get("id"); } if(config.forceFromStart && !topologyInstanceId.equals(lastInstanceId)) { offset = consumer.getOffsetsBefore(config.topic, partition.partition, config.startOffsetTime, 1)[0]; } else { offset = (Long) lastMeta.get("nextOffset"); } } else { long startTime = -1; if(config.forceFromStart) startTime = config.startOffsetTime; offset = consumer.getOffsetsBefore(config.topic, partition.partition, startTime, 1)[0]; } ByteBufferMessageSet msgs; try { long start = System.nanoTime(); msgs = consumer.fetch(new FetchRequest(config.topic, partition.partition, offset, config.fetchSizeBytes)); long end = System.nanoTime(); long millis = (end - start) / 1000000; meanMetric.update(millis); maxMetric.update(millis); } catch(Exception e) { if(e instanceof ConnectException) { throw new FailedFetchException(e); } else { throw new RuntimeException(e); } } long endoffset = offset; for(MessageAndOffset msg: msgs) { emit(config, collector, msg.message()); endoffset = msg.offset(); } Map newMeta = new HashMap(); newMeta.put("offset", offset); newMeta.put("nextOffset", endoffset); newMeta.put("instanceId", topologyInstanceId); newMeta.put("partition", partition.partition); newMeta.put("broker", ImmutableMap.of("host", partition.host.host, "port", partition.host.port)); newMeta.put("topic", config.topic); newMeta.put("topology", ImmutableMap.of("name", topologyName, "id", topologyInstanceId)); return newMeta; } public static void emit(TridentKafkaConfig config, TridentCollector collector, Message msg) { Iterable<List<Object>> values = config.scheme.deserialize(Utils.toByteArray(msg.payload())); if(values!=null) { for(List<Object> value: values) collector.emit(value); } } public static class KafkaOffsetMetric implements IMetric { Map<GlobalPartitionId, Long> _partitionToOffset = new HashMap<GlobalPartitionId, Long>(); Set<GlobalPartitionId> _partitions; String _topic; DynamicPartitionConnections _connections; public KafkaOffsetMetric(String topic, DynamicPartitionConnections connections) { _topic = topic; _connections = connections; } public void setLatestEmittedOffset(GlobalPartitionId partition, long offset) { _partitionToOffset.put(partition, offset); } @Override public Object getValueAndReset() { try { long totalSpoutLag = 0; long totalLatestTimeOffset = 0; long totalLatestEmittedOffset = 0; HashMap ret = new HashMap(); if(_partitions != null && _partitions.size() == _partitionToOffset.size()) { for(Map.Entry<GlobalPartitionId, Long> e : _partitionToOffset.entrySet()) { GlobalPartitionId partition = e.getKey(); SimpleConsumer consumer = _connections.getConnection(partition); if(consumer == null) { LOG.warn("partitionToOffset contains partition not found in _connections. Stale partition data?"); return null; } long latestTimeOffset = consumer.getOffsetsBefore(_topic, partition.partition, OffsetRequest.LatestTime(), 1)[0]; if(latestTimeOffset == 0) { LOG.warn("No data found in Kafka Partition " + partition.getId()); return null; } long latestEmittedOffset = (Long)e.getValue(); long spoutLag = latestTimeOffset - latestEmittedOffset; ret.put(partition.getId() + "/" + "spoutLag", spoutLag); ret.put(partition.getId() + "/" + "latestTime", latestTimeOffset); ret.put(partition.getId() + "/" + "latestEmittedOffset", latestEmittedOffset); totalSpoutLag += spoutLag; totalLatestTimeOffset += latestTimeOffset; totalLatestEmittedOffset += latestEmittedOffset; } ret.put("totalSpoutLag", totalSpoutLag); ret.put("totalLatestTime", totalLatestTimeOffset); ret.put("totalLatestEmittedOffset", totalLatestEmittedOffset); return ret; } else { LOG.info("Metrics Tick: Not enough data to calculate spout lag."); } } catch(Throwable t) { LOG.warn("Metrics Tick: Exception when computing kafkaOffset metric.", t); } return null; } public void refreshPartitions(Set<GlobalPartitionId> partitions) { _partitions = partitions; Iterator<GlobalPartitionId> it = _partitionToOffset.keySet().iterator(); while(it.hasNext()) { if(!partitions.contains(it.next())) it.remove(); } } }; }