// Copyright 2016 Twitter. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.twitter.heron.metricscachemgr.metricscache;
import java.time.Duration;
import java.util.HashMap;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import com.twitter.heron.common.basics.WakeableLooper;
import com.twitter.heron.common.config.SystemConfig;
import com.twitter.heron.metricscachemgr.metricscache.query.ExceptionDatum;
import com.twitter.heron.metricscachemgr.metricscache.query.ExceptionRequest;
import com.twitter.heron.metricscachemgr.metricscache.query.ExceptionResponse;
import com.twitter.heron.metricscachemgr.metricscache.query.MetricRequest;
import com.twitter.heron.metricscachemgr.metricscache.query.MetricResponse;
import com.twitter.heron.metricsmgr.MetricsSinksConfig;
import com.twitter.heron.proto.system.Common;
import com.twitter.heron.proto.tmaster.TopologyMaster;
import com.twitter.heron.spi.metricsmgr.metrics.MetricsFilter;
/**
* Interface for the cache core
* providing compatible interface with tmaster
* see heron/tmaster/src/cpp/manager/tmetrics-collector.h
*/
public class MetricsCache {
public static final String METRICS_SINKS_TMASTER_SINK = "tmaster-sink";
public static final String METRICS_SINKS_TMASTER_METRICS = "tmaster-metrics-type";
private static final Logger LOG = Logger.getLogger(CacheCore.class.getName());
private final CacheCore cache;
private final MetricsFilter metricNameType;
public MetricsCache(SystemConfig systemConfig, MetricsSinksConfig sinksConfig,
WakeableLooper looper) {
// metadata
metricNameType = new MetricsFilter();
Map<String, Object> sinksTMaster = sinksConfig.getConfigForSink(METRICS_SINKS_TMASTER_SINK);
@SuppressWarnings("unchecked")
Map<String, String> metricsTypes =
(Map<String, String>) sinksTMaster.get(METRICS_SINKS_TMASTER_METRICS);
for (String metricName : metricsTypes.keySet()) {
metricNameType.setMetricToType(metricName, translateFromString(metricsTypes.get(metricName)));
}
Duration maxInterval = systemConfig.getTmasterMetricsCollectorMaximumInterval();
Duration purgeInterval = systemConfig.getTmasterMetricsCollectorPurgeInterval();
long maxExceptions = systemConfig.getTmasterMetricsCollectorMaximumException();
cache = new CacheCore(maxInterval, purgeInterval, maxExceptions);
cache.startPurge(looper);
}
private static TopologyMaster.MetricResponse.Builder buildResponseNotOk(String message) {
TopologyMaster.MetricResponse.Builder builder =
TopologyMaster.MetricResponse.newBuilder();
builder.setStatus(Common.Status.newBuilder()
.setStatus(Common.StatusCode.NOTOK)
.setMessage(message));
return builder;
}
private MetricsFilter.MetricAggregationType translateFromString(String type) {
try {
return MetricsFilter.MetricAggregationType.valueOf(type);
} catch (IllegalArgumentException e) {
LOG.log(Level.SEVERE, "Unknown metrics type in metrics sinks " + type + "; " + e);
return MetricsFilter.MetricAggregationType.UNKNOWN;
}
}
/**
* sink publishes metrics and exceptions to this interface
*
* @param metrics message from sinks
*/
public void addMetrics(TopologyMaster.PublishMetrics metrics) {
cache.addMetricException(metrics);
}
/**
* for inside SLA process component query
*
* @param request query statement
* @return metric list
*/
public MetricResponse getMetrics(MetricRequest request) {
return cache.getMetrics(request, metricNameType);
}
/**
* for inside SLA process component query
*
* @param request query statement
* @return exception list
*/
public ExceptionResponse getExceptions(ExceptionRequest request) {
return cache.getExceptions(request);
}
/**
* compatible with tmaster interface
*
* @param request query request defined in protobuf
* @return query result defined in protobuf
*/
public TopologyMaster.ExceptionLogResponse getExceptions(
TopologyMaster.ExceptionLogRequest request) {
ExceptionRequest request1 = MetricsCacheQueryUtils.fromProtobuf(request);
ExceptionResponse response1 = cache.getExceptions(request1);
TopologyMaster.ExceptionLogResponse response = MetricsCacheQueryUtils.toProtobuf(response1);
return response;
}
private ExceptionResponse summarizeException(ExceptionResponse response1) {
Map<String, ExceptionDatum> exceptionSummary = new HashMap<>();
for (ExceptionDatum edp : response1.getExceptionDatapointList()) {
// Get classname by splitting on first colon
int pos = edp.getStackTrace().indexOf(':');
if (pos >= 0) {
String className = edp.getStackTrace().substring(0, pos);
if (!exceptionSummary.containsKey(className)) {
exceptionSummary.put(className,
new ExceptionDatum(edp.getComponentName(), edp.getInstanceId(), edp.getHostname(),
className, edp.getLastTime(), edp.getFirstTime(),
edp.getCount(), edp.getLogging()));
} else {
ExceptionDatum edp3 = exceptionSummary.get(className);
// update count and time
int count = edp3.getCount() + edp.getCount();
String firstTime = edp3.getFirstTime();
String lastTime = edp.getLastTime(); // should assure the time ?
// put it back in summary
exceptionSummary.put(className,
new ExceptionDatum(edp3.getComponentName(), edp3.getInstanceId(), edp3.getHostname(),
edp3.getStackTrace(), lastTime, firstTime, count, edp3.getLogging()));
}
}
}
ExceptionResponse ret = new ExceptionResponse(exceptionSummary.values());
return ret;
}
/**
* compatible with tmaster interface
*
* @param request query statement defined in protobuf
* @return query result defined in protobuf
*/
public TopologyMaster.ExceptionLogResponse getExceptionsSummary(
TopologyMaster.ExceptionLogRequest request) {
ExceptionRequest request1 = MetricsCacheQueryUtils.fromProtobuf(request);
ExceptionResponse response1 = cache.getExceptions(request1);
ExceptionResponse response2 = summarizeException(response1);
TopologyMaster.ExceptionLogResponse response = MetricsCacheQueryUtils.toProtobuf(response2);
return response;
}
/**
* compatible with tmaster interface
*
* @param request query statement defined in protobuf
* @return query result defined in protobuf
*/
public TopologyMaster.MetricResponse getMetrics(TopologyMaster.MetricRequest request) {
String componentName = request.getComponentName();
if (!cache.componentInstanceExists(componentName, null)) {
return buildResponseNotOk(
String.format("Unknown component %s found in MetricRequest %s", componentName, request)
).build();
}
if (request.getInstanceIdCount() > 0) {
for (String instanceId : request.getInstanceIdList()) {
if (!cache.componentInstanceExists(componentName, instanceId)) {
return buildResponseNotOk(
String.format("Unknown instance %s found in MetricRequest %s", instanceId, request)
).build();
}
}
}
if (!request.hasInterval() && !request.hasExplicitInterval()) {
return buildResponseNotOk("No purgeIntervalSec or explicit purgeIntervalSec set").build();
}
MetricRequest request1 = MetricsCacheQueryUtils.fromProtobuf(request);
MetricResponse response1 = cache.getMetrics(request1, metricNameType);
TopologyMaster.MetricResponse response = MetricsCacheQueryUtils.toProtobuf(response1, request1);
return response;
}
}