/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.alibaba.jstorm.metric;
import com.alibaba.jstorm.config.Refreshable;
import com.alibaba.jstorm.config.RefreshableComponents;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alibaba.jstorm.callback.AsyncLoopThread;
import com.alibaba.jstorm.callback.RunnableCallback;
import com.alibaba.jstorm.client.ConfigExtension;
import com.alibaba.jstorm.cluster.Common;
import com.alibaba.jstorm.cluster.StormConfig;
import com.alibaba.jstorm.common.metric.AsmHistogram;
import com.alibaba.jstorm.common.metric.AsmMetric;
import com.alibaba.jstorm.daemon.nimbus.NimbusData;
import com.alibaba.jstorm.daemon.supervisor.SupervisorManger;
import com.alibaba.jstorm.daemon.worker.WorkerData;
import com.alibaba.jstorm.task.execute.BoltCollector;
import com.alibaba.jstorm.task.execute.spout.SpoutCollector;
import com.alibaba.jstorm.utils.JStormServerUtils;
import com.alibaba.jstorm.utils.TimeUtils;
import com.google.common.annotations.VisibleForTesting;
import backtype.storm.Config;
import backtype.storm.generated.MetricInfo;
import backtype.storm.generated.TopologyMetric;
import backtype.storm.generated.WorkerUploadMetrics;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.OutputCollector;
import backtype.storm.tuple.Values;
import backtype.storm.utils.NimbusClientWrapper;
/**
* report metrics from worker to nimbus server. this class serves as an object in Worker/Nimbus/Supervisor.
* when in a Worker, it reports data via netty transport(to topology master first)
* otherwise reports via thrift to nimbus directly
* <p/>
* there are 2 threads:
* 1.flush thread: check every 1 sec, when current time is aligned to 1 min, flush all metrics to snapshots
* 2.check meta thread: use thrift to get metric id from nimbus server.
*
* @author Cody (weiyue.wy@alibaba-inc.com)
* @since 2.0.5
*/
public class JStormMetricsReporter implements Refreshable {
private static final Logger LOG = LoggerFactory.getLogger(JStormMetricsReporter.class);
private Map conf;
protected String clusterName;
protected String topologyId;
protected String host;
protected int port;
protected boolean localMode = false;
private AsyncLoopThread checkMetricMetaThread;
protected final int checkMetaThreadCycle;
private AsyncLoopThread flushMetricThread;
protected final int flushMetricThreadCycle;
private boolean test = false;
private boolean inTopology = false;
private volatile SpoutOutputCollector spoutOutput;
private volatile OutputCollector boltOutput;
private NimbusClientWrapper client = null;
private MetricsRegister metricsRegister;
public JStormMetricsReporter(Object role) {
LOG.info("starting jstorm metrics reporter in {}", role.getClass().getSimpleName());
if (role instanceof WorkerData) {
WorkerData workerData = (WorkerData) role;
this.conf = workerData.getStormConf();
this.topologyId = (String) conf.get(Config.TOPOLOGY_ID);
this.port = workerData.getPort();
this.inTopology = true;
} else if (role instanceof NimbusData) {
NimbusData nimbusData = (NimbusData) role;
this.conf = nimbusData.getConf();
this.topologyId = JStormMetrics.NIMBUS_METRIC_KEY;
} else if (role instanceof SupervisorManger) {
SupervisorManger supervisor = (SupervisorManger) role;
this.conf = supervisor.getConf();
this.topologyId = JStormMetrics.SUPERVISOR_METRIC_KEY;
JStormMetrics.setTopologyId(this.topologyId);
}
// update metrics config
refresh(conf);
this.metricsRegister = new MetricsRegister(conf, topologyId);
this.host = JStormMetrics.getHost();
if (!JStormMetrics.enabled) {
LOG.warn("***** topology metrics is disabled! *****");
} else {
LOG.info("topology metrics is enabled.");
}
this.checkMetaThreadCycle = 20;
// flush metric snapshots when time is aligned, check every sec.
this.flushMetricThreadCycle = 1;
LOG.info("check meta thread freq: {} sec, flush metrics thread freq: {} sec",
checkMetaThreadCycle, flushMetricThreadCycle);
this.localMode = StormConfig.local_mode(conf);
this.clusterName = ConfigExtension.getClusterName(conf);
RefreshableComponents.registerRefreshable(this);
LOG.info("done.");
}
@VisibleForTesting
JStormMetricsReporter() {
LOG.info("Successfully started jstorm metrics reporter for test.");
this.test = true;
this.flushMetricThreadCycle = 1;
this.checkMetaThreadCycle = 20;
}
public void init() {
if (JStormMetrics.enabled) {
this.checkMetricMetaThread = new AsyncLoopThread(new CheckMetricMetaThread());
this.flushMetricThread = new AsyncLoopThread(new FlushMetricThread());
}
}
public void shutdown() {
if (JStormMetrics.enabled) {
this.checkMetricMetaThread.cleanup();
this.flushMetricThread.cleanup();
}
}
private Map<String, Long> registerMetrics(Set<String> names) {
if (test) {
return new HashMap<>();
}
return metricsRegister.registerMetrics(names);
}
public void uploadMetricData() {
if (test) {
return;
}
try {
long start = System.currentTimeMillis();
MetricInfo workerMetricInfo = MetricUtils.metricAccurateCal ?
JStormMetrics.computeAllMetrics() : JStormMetrics.approximateComputeAllMetrics();
WorkerUploadMetrics upload = new WorkerUploadMetrics();
upload.set_topologyId(topologyId);
upload.set_supervisorId(host);
upload.set_port(port);
upload.set_allMetrics(workerMetricInfo);
if (workerMetricInfo.get_metrics_size() > 0) {
uploadMetricData(upload);
LOG.debug("Successfully upload worker metrics, size:{}, cost:{}",
workerMetricInfo.get_metrics_size(), System.currentTimeMillis() - start);
} else {
LOG.debug("No metrics to upload.");
}
} catch (Exception e) {
LOG.error("Failed to upload worker metrics", e);
}
}
public void uploadMetricData(WorkerUploadMetrics metrics) {
if (inTopology) {
//in Worker, we upload data via netty transport
if (boltOutput != null) {
LOG.debug("emit metrics through bolt collector.");
((BoltCollector) boltOutput.getDelegate()).emitCtrl(Common.TOPOLOGY_MASTER_METRICS_STREAM_ID, null,
new Values(JStormServerUtils.getName(host, port), metrics));
} else if (spoutOutput != null) {
LOG.debug("emit metrics through spout collector.");
((SpoutCollector) spoutOutput.getDelegate()).emitCtrl(Common.TOPOLOGY_MASTER_METRICS_STREAM_ID,
new Values(JStormServerUtils.getName(host, port), metrics), null);
} else {
LOG.warn("topology:{}, both spout/bolt collectors are null, don't know what to do...", topologyId);
}
} else {
// in supervisor or nimbus, we upload metric data via thrift
LOG.debug("emit metrics through nimbus client.");
TopologyMetric tpMetric = MetricUtils.mkTopologyMetric();
tpMetric.set_workerMetric(metrics.get_allMetrics());
//UpdateEvent.pushEvent(topologyId, tpMetric);
try {
// push metrics via nimbus client
if (client == null) {
LOG.warn("nimbus client is null...");
client = new NimbusClientWrapper();
client.init(conf);
}
client.getClient().uploadTopologyMetrics(topologyId, tpMetric);
} catch (Throwable ex) {
LOG.error("upload metrics error:", ex);
if (client != null) {
client.cleanup();
client = null;
}
}
}
//MetricUtils.logMetrics(metrics.get_allMetrics());
}
public void setOutputCollector(Object outputCollector) {
if (outputCollector instanceof OutputCollector) {
this.boltOutput = (OutputCollector) outputCollector;
} else if (outputCollector instanceof SpoutOutputCollector) {
this.spoutOutput = (SpoutOutputCollector) outputCollector;
}
}
public void updateMetricConfig(Map newConf) {
JStormMetrics.setDebug(ConfigExtension.isEnableMetricDebug(conf));
JStormMetrics.addDebugMetrics(ConfigExtension.getDebugMetricNames(conf));
//update metric accurate calculate
boolean accurateMetric = ConfigExtension.getTopologyAccurateMetric(newConf);
if (MetricUtils.metricAccurateCal != accurateMetric) {
MetricUtils.metricAccurateCal = accurateMetric;
LOG.info("switch topology metric accurate enable to {}", MetricUtils.metricAccurateCal);
}
// update enabled/disabled metrics
String enabledMetrics = ConfigExtension.getEnabledMetricNames(newConf);
String disabledMetrics = ConfigExtension.getDisabledMetricNames(newConf);
if (enabledMetrics != null || disabledMetrics != null) {
Set<String> enabledMetricSet = toSet(enabledMetrics, ",");
Set<String> disabledMetricsSet = toSet(disabledMetrics, ",");
AsmMetricRegistry[] registries = new AsmMetricRegistry[]{
JStormMetrics.getTopologyMetrics(),
JStormMetrics.getComponentMetrics(),
JStormMetrics.getTaskMetrics(),
JStormMetrics.getStreamMetrics(),
JStormMetrics.getNettyMetrics(),
JStormMetrics.getWorkerMetrics()
};
for (AsmMetricRegistry registry : registries) {
Collection<AsmMetric> metrics = registry.getMetrics().values();
for (AsmMetric metric : metrics) {
String shortMetricName = metric.getShortName();
if (enabledMetricSet.contains(shortMetricName)) {
metric.setEnabled(true);
} else if (disabledMetricsSet.contains(shortMetricName)) {
metric.setEnabled(false);
}
}
}
}
long updateInterval = ConfigExtension.getTimerUpdateInterval(newConf);
if (updateInterval != AsmHistogram.getUpdateInterval()) {
AsmHistogram.setUpdateInterval(updateInterval);
}
boolean enableStreamMetrics = ConfigExtension.isEnableStreamMetrics(newConf);
if (enableStreamMetrics != JStormMetrics.enableStreamMetrics) {
JStormMetrics.enableStreamMetrics = enableStreamMetrics;
LOG.info("switch topology stream metric enable to {}", enableStreamMetrics);
}
boolean enableMetrics = ConfigExtension.isEnableMetrics(newConf);
if (enableMetrics != JStormMetrics.enabled) {
JStormMetrics.enabled = enableMetrics;
LOG.info("switch topology metric enable to {}", enableMetrics);
}
}
private Set<String> toSet(String items, String delim) {
Set<String> ret = new HashSet<>();
if (!StringUtils.isBlank(items)) {
String[] metrics = items.split(delim);
for (String metric : metrics) {
metric = metric.trim();
if (!StringUtils.isBlank(metric)) {
ret.add(metric);
}
}
}
return ret;
}
@Override
public void refresh(Map conf) {
updateMetricConfig(conf);
}
/**
* A thread which flushes metrics data on aligned time, and sends metrics data to:
* 1. nimbus via nimbus client if this JStormMetricsReporter instance is not in a topology worker
* 2. topology master via netty if it's in a topology worker
*/
class FlushMetricThread extends RunnableCallback {
@Override
public void run() {
if (!JStormMetrics.enabled || !TimeUtils.isTimeAligned()) {
return;
}
int cnt = 0;
try {
for (AsmMetricRegistry registry : JStormMetrics.allRegistries) {
for (Map.Entry<String, AsmMetric> entry : registry.getMetrics().entrySet()) {
entry.getValue().flush();
cnt++;
}
}
LOG.debug("flush metrics, total:{}.", cnt);
uploadMetricData();
} catch (Exception ex) {
LOG.error("Error", ex);
}
}
@Override
public Object getResult() {
return flushMetricThreadCycle;
}
}
/**
* A thread which checks metric meta every checkMetaThreadCycle seconds, and tries to:
* 1. register metrics via nimbus client if it's not in a topology worker
* 2. register metrics to topology master if it's in a topology worker
*/
class CheckMetricMetaThread extends RunnableCallback {
private volatile boolean processing = false;
private final long start = TimeUtils.current_time_secs();
private final long initialDelay = 15 + new Random().nextInt(15);
@Override
public void run() {
if (!JStormMetrics.enabled || TimeUtils.current_time_secs() - start < initialDelay) {
return;
}
if (processing) {
LOG.debug("still processing, skip...");
} else {
processing = true;
try {
Set<String> names = new HashSet<>();
for (AsmMetricRegistry registry : JStormMetrics.allRegistries) {
Map<String, AsmMetric> metricMap = registry.getMetrics();
for (Map.Entry<String, AsmMetric> metricEntry : metricMap.entrySet()) {
AsmMetric metric = metricEntry.getValue();
if (((metric.getOp() & AsmMetric.MetricOp.REPORT) == AsmMetric.MetricOp.REPORT) &&
!MetricUtils.isValidId(metric.getMetricId())) {
names.add(metricEntry.getKey());
}
}
}
// when in nimbus/supervisor, force to check worker metrics(CpuUsedRatio, DiskUsage, etc) again
// TODO: ideally, this should only happen in a short period when nimbus restarts lately
if (!inTopology) {
for (Map.Entry<String, AsmMetric> metricEntry : JStormMetrics.workerMetrics.getMetrics().entrySet()) {
AsmMetric metric = metricEntry.getValue();
if (((metric.getOp() & AsmMetric.MetricOp.REPORT) == AsmMetric.MetricOp.REPORT)) {
names.add(metricEntry.getKey());
}
}
}
// register via nimbus client, for supervisors/nimbus servers
if (names.size() > 0) {
LOG.debug("register metrics, size:{}", names.size());
if (!inTopology) {
Map<String, Long> nameIdMap = registerMetrics(names);
if (nameIdMap != null) {
for (String name : nameIdMap.keySet()) {
AsmMetric metric = JStormMetrics.find(name);
if (metric != null) {
long id = nameIdMap.get(name);
metric.setMetricId(id);
LOG.debug("set metric id, {}:{}", name, id);
}
}
}
} else { // register via TM, for topologies
if (spoutOutput != null) {
((SpoutCollector) spoutOutput.getDelegate()).emitCtrl(
Common.TOPOLOGY_MASTER_REGISTER_METRICS_STREAM_ID, new Values(names), null);
} else if (boltOutput != null) {
((BoltCollector) boltOutput.getDelegate()).emitCtrl(
Common.TOPOLOGY_MASTER_REGISTER_METRICS_STREAM_ID, null, new Values(names));
} else {
LOG.warn("topology:{}, both spout and bolt collectors are null, don't know what to do...", topologyId);
}
}
}
} catch (Throwable ex) {
LOG.error("Error", ex);
}
processing = false;
}
}
@Override
public Object getResult() {
return checkMetaThreadCycle;
}
}
/**
* Register metric meta callback. Called in SpoutExecutors/BoltExecutors within topology workers.
*
* JStormMetricsReporter first sends a TOPOLOGY_MASTER_REGISTER_METRICS_STREAM_ID stream to TM to register metrics,
* on success TM will return a TOPOLOGY_MASTER_REGISTER_METRICS_RESP_STREAM_ID stream which contains
* registered metric meta and then call this method to update local meta.
*/
public void updateMetricMeta(Map<String, Long> nameIdMap) {
if (nameIdMap != null) {
for (String name : nameIdMap.keySet()) {
AsmMetric metric = JStormMetrics.find(name);
if (metric != null) {
long id = nameIdMap.get(name);
metric.setMetricId(id);
LOG.debug("set metric id, {}:{}", name, id);
}
}
}
}
}