// Copyright 2016 Twitter. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.twitter.heron.metricsmgr.sink; import java.time.Duration; import java.time.temporal.ChronoUnit; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.scribe.LogEntry; import org.apache.scribe.ResultCode; import org.apache.scribe.scribe; import org.apache.thrift.TException; import org.apache.thrift.protocol.TBinaryProtocol; import org.apache.thrift.protocol.TProtocol; import org.apache.thrift.transport.TFramedTransport; import org.apache.thrift.transport.TSocket; import com.twitter.heron.common.basics.SysUtils; import com.twitter.heron.common.basics.TypeUtils; import com.twitter.heron.spi.metricsmgr.metrics.MetricsInfo; import com.twitter.heron.spi.metricsmgr.metrics.MetricsRecord; import com.twitter.heron.spi.metricsmgr.sink.IMetricsSink; import com.twitter.heron.spi.metricsmgr.sink.SinkContext; /** * A metrics sink that writes to Scribe with format required by Twitter Cuckoo */ public class ScribeSink implements IMetricsSink { private static final Logger LOG = Logger.getLogger(ScribeSink.class.getName()); // Counters' key private static final String MESSAGE = "message"; private static final String OK = "ok"; private static final String TRY_AGAIN = "try_again"; private static final String FAILED = "failed"; // These configs would be read from sink-configs.yaml private static final String KEY_SCRIBE_HOST = "scribe-host"; private static final String KEY_SCRIBE_PORT = "scribe-port"; private static final String KEY_CATEGORY = "scribe-category"; private static final String KEY_SERVICE_NAMESPACE = "service-namespace"; private static final String KEY_SCRIBE_TIMEOUT_MS = "scribe-timeout-ms"; private static final String KEY_SCRIBE_CONNECT_SERVER_ATTEMPTS = "scribe-connect-server-attempts"; private static final String KEY_SCRIBE_RETRY_ATTEMPTS = "scribe-retry-attempts"; private static final String KEY_SCRIBE_RETRY_INTERVAL_MS = "scribe-retry-interval-ms"; // Metrics Counter Name private static final String FLUSH_COUNT = "flush-count"; private static final String RECORD_PROCESS_COUNT = "record-process-count"; private static final String MESSAGE_COUNT = "message-count"; private static final String OK_COUNT = "ok-count"; private static final String TRY_AGAIN_COUNT = "try-again-count"; private static final String FAILED_COUNT = "failed-count"; private static final String ILLEGAL_METRICS_COUNT = "illegal-metrics-count"; private static final Map<String, Long> COUNTERS = new HashMap<String, Long>(); private static final ObjectMapper MAPPER = new ObjectMapper(); // The SinkConfig for ScribeSink, which is parsed from sink-configs.yaml private Map<String, Object> config; private TFramedTransport transport; private scribe.Client client; private SinkContext sinkContext; private String topologyName; private int connectRetryAttempts; @Override public void init(Map<String, Object> conf, SinkContext context) { // Init the COUNTERS COUNTERS.put(MESSAGE, 0L); COUNTERS.put(OK, 0L); COUNTERS.put(TRY_AGAIN, 0L); COUNTERS.put(FAILED, 0L); config = conf; sinkContext = context; topologyName = context.getTopologyName(); connectRetryAttempts = TypeUtils.getInteger(config.get(KEY_SCRIBE_CONNECT_SERVER_ATTEMPTS)); // Open the TTransport connection and client to scribe server open(); } @Override public void processRecord(MetricsRecord record) { // increase the processed counters COUNTERS.put(MESSAGE, COUNTERS.get(MESSAGE) + 1); // Check whether the TSocket is already open // If not, try to open the TSocket again if (!transport.isOpen() && !open()) { COUNTERS.put(FAILED, COUNTERS.get(FAILED) + 1); LOG.severe("Failed due to TTransport is not open"); if (COUNTERS.get(FAILED) >= connectRetryAttempts) { throw new RuntimeException("The scribe sink failed to connect to server; exceeds " + connectRetryAttempts + " attempts"); } return; } // Convert MetricsRecord to Twitter Cuckoo Format LogEntry logEntry = new LogEntry(); logEntry.category = (String) config.get(KEY_CATEGORY); logEntry.message = makeJSON(record); LOG.fine("Metrics to log to Scribe" + logEntry.message); List<LogEntry> pendingEntries = new LinkedList<LogEntry>(); pendingEntries.add(logEntry); // Log to Scribe with retry logToScribe(pendingEntries); // Update the Metrics sinkContext.exportCountMetric(RECORD_PROCESS_COUNT, 1); } @Override public void flush() { // We would directly log to scribe in processRecord(...) // So just flush counters here flushCounters(); // Update the Metrics sinkContext.exportCountMetric(FLUSH_COUNT, 1); sinkContext.exportCountMetric(MESSAGE_COUNT, COUNTERS.get(MESSAGE).intValue()); sinkContext.exportCountMetric(OK_COUNT, COUNTERS.get(OK).intValue()); sinkContext.exportCountMetric(TRY_AGAIN_COUNT, COUNTERS.get(TRY_AGAIN).intValue()); sinkContext.exportCountMetric(FAILED_COUNT, COUNTERS.get(FAILED).intValue()); } @Override public void close() { LOG.info("Closing ScribeSink"); transport.close(); flushCounters(); } // Open the TTransport connection and client to scribe server private boolean open() { try { TSocket socket = new TSocket((String) config.get(KEY_SCRIBE_HOST), TypeUtils.getInteger(config.get(KEY_SCRIBE_PORT)), TypeUtils.getInteger(config.get(KEY_SCRIBE_TIMEOUT_MS))); transport = new TFramedTransport(socket); transport.open(); } catch (TException tx) { LOG.log(Level.SEVERE, "Failed to open connection to scribe server " + connectionString(), tx); return false; } LOG.info("Opened connection to scribe server " + connectionString()); TProtocol protocol = new TBinaryProtocol(transport); client = new scribe.Client(protocol); return true; } // Log the message to scribe, optionally retrying private void logToScribe(List<LogEntry> pendingEntries) { int retryAttempts = TypeUtils.getInteger(config.get(KEY_SCRIBE_RETRY_ATTEMPTS)); Duration retryInterval = TypeUtils.getDuration( config.get(KEY_SCRIBE_RETRY_INTERVAL_MS), ChronoUnit.MILLIS); try { for (int attempt = 0; attempt < retryAttempts; attempt++) { ResultCode result = client.Log(pendingEntries); // If successfully, we are done if (result.equals(ResultCode.OK)) { COUNTERS.put(OK, COUNTERS.get(OK) + pendingEntries.size()); return; } // otherwise, try once more if (result.equals(ResultCode.TRY_LATER)) { COUNTERS.put(TRY_AGAIN, COUNTERS.get(TRY_AGAIN) + 1); } // Sleep a while to avoid to hit scribe server heavily SysUtils.sleep(retryInterval); } } catch (TException te) { LOG.log(Level.SEVERE, "Message sending failed due to TransportException: ", te); } COUNTERS.put(FAILED, COUNTERS.get(FAILED) + 1); close(); } // Convert a record into json format required by Twitter Cuckoo private String makeJSON(MetricsRecord record) { String serviceName = String.format("%s/%s", config.get(KEY_SERVICE_NAMESPACE), topologyName); // The source format is "host:port/componentName/instanceId" // However, we need just "/componentName/instanceId" String[] sources = record.getSource().split("/"); String source = String.format("/%s/%s", sources[1], sources[2]); // The timestamp is in ms, however, we need to convert it in seconds to fit Twitter Infra long timestamp = Duration.ofMillis(record.getTimestamp()).getSeconds(); Map<String, Object> json = new HashMap<String, Object>(); // Add the service name json.put("service", serviceName); // Add the service name json.put("source", source); // Add the time stamp json.put("timestamp", timestamp); // Cuckoo_json allows multi-metrics in a single JSON, so we would like to // package all metrics received into one single JSON int metricsToWrite = 0; for (MetricsInfo info : record.getMetrics()) { // First check whether the metric value is legal // since scribe would only accept a Double value as metric value // We would just skip it Double val; try { val = Double.valueOf(info.getValue()); } catch (NumberFormatException ne) { LOG.log(Level.SEVERE, "Could not parse illegal metric: " + info.toString()); sinkContext.exportCountMetric(ILLEGAL_METRICS_COUNT, 1); continue; } json.put(info.getName(), val); metricsToWrite++; } LOG.info(metricsToWrite + " metrics added"); String result = ""; try { result = MAPPER.writeValueAsString(json); } catch (JsonProcessingException e) { LOG.log(Level.SEVERE, "Could not convert map to JSONString: " + json.toString(), e); } return result; } private String connectionString() { return String.format("<%s:%d>", config.get(KEY_SCRIBE_HOST), TypeUtils.getInteger(config.get(KEY_SCRIBE_PORT))); } private void flushCounters() { LOG.info(COUNTERS.toString()); } }