/**
* Copyright 2016 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
package com.github.ambry.replication;
import com.codahale.metrics.Counter;
import com.codahale.metrics.Gauge;
import com.codahale.metrics.Histogram;
import com.codahale.metrics.Meter;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.Timer;
import com.github.ambry.clustermap.DataNodeId;
import com.github.ambry.clustermap.PartitionId;
import com.github.ambry.clustermap.ReplicaId;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Metrics for Replication
*/
public class ReplicationMetrics {
public final Map<String, Meter> interColoReplicationBytesRate = new HashMap<String, Meter>();
public final Meter intraColoReplicationBytesRate;
public final Map<String, Meter> plainTextInterColoReplicationBytesRate = new HashMap<String, Meter>();
public final Meter plainTextIntraColoReplicationBytesRate;
public final Map<String, Meter> sslInterColoReplicationBytesRate = new HashMap<String, Meter>();
public final Meter sslIntraColoReplicationBytesRate;
public final Map<String, Counter> interColoMetadataExchangeCount = new HashMap<String, Counter>();
public final Counter intraColoMetadataExchangeCount;
public final Map<String, Counter> interColoBlobsReplicatedCount = new HashMap<String, Counter>();
public final Counter intraColoBlobsReplicatedCount;
public final Counter unknownRemoteReplicaRequestCount;
public final Map<String, Counter> plainTextInterColoMetadataExchangeCount = new HashMap<String, Counter>();
public final Counter plainTextIntraColoMetadataExchangeCount;
public final Map<String, Counter> plainTextInterColoBlobsReplicatedCount = new HashMap<String, Counter>();
public final Counter plainTextIntraColoBlobsReplicatedCount;
public final Map<String, Counter> sslInterColoMetadataExchangeCount = new HashMap<String, Counter>();
public final Counter sslIntraColoMetadataExchangeCount;
public final Map<String, Counter> sslInterColoBlobsReplicatedCount = new HashMap<String, Counter>();
public final Counter sslIntraColoBlobsReplicatedCount;
public final Counter replicationErrors;
public final Counter plainTextReplicationErrors;
public final Counter sslReplicationErrors;
public final Counter replicationTokenResetCount;
public final Counter replicationInvalidMessageStreamErrorCount;
public final Map<String, Timer> interColoReplicationLatency = new HashMap<String, Timer>();
public final Timer intraColoReplicationLatency;
public final Map<String, Timer> plainTextInterColoReplicationLatency = new HashMap<String, Timer>();
public final Timer plainTextIntraColoReplicationLatency;
public final Map<String, Timer> sslInterColoReplicationLatency = new HashMap<String, Timer>();
public final Timer sslIntraColoReplicationLatency;
public final Histogram remoteReplicaTokensPersistTime;
public final Histogram remoteReplicaTokensRestoreTime;
public final Map<String, Histogram> interColoExchangeMetadataTime = new HashMap<String, Histogram>();
public final Histogram intraColoExchangeMetadataTime;
public final Map<String, Histogram> plainTextInterColoExchangeMetadataTime = new HashMap<String, Histogram>();
public final Histogram plainTextIntraColoExchangeMetadataTime;
public final Map<String, Histogram> sslInterColoExchangeMetadataTime = new HashMap<String, Histogram>();
public final Histogram sslIntraColoExchangeMetadataTime;
public final Map<String, Histogram> interColoFixMissingKeysTime = new HashMap<String, Histogram>();
public final Histogram intraColoFixMissingKeysTime;
public final Map<String, Histogram> plainTextInterColoFixMissingKeysTime = new HashMap<String, Histogram>();
public final Histogram plainTextIntraColoFixMissingKeysTime;
public final Map<String, Histogram> sslInterColoFixMissingKeysTime = new HashMap<String, Histogram>();
public final Histogram sslIntraColoFixMissingKeysTime;
public final Map<String, Histogram> interColoReplicationMetadataRequestTime = new HashMap<String, Histogram>();
public final Histogram intraColoReplicationMetadataRequestTime;
public final Map<String, Histogram> plainTextInterColoReplicationMetadataRequestTime =
new HashMap<String, Histogram>();
public final Histogram plainTextIntraColoReplicationMetadataRequestTime;
public final Map<String, Histogram> sslInterColoReplicationMetadataRequestTime = new HashMap<String, Histogram>();
public final Histogram sslIntraColoReplicationMetadataRequestTime;
public final Histogram intraColoReplicationWaitTime;
public final Map<String, Histogram> interColoCheckMissingKeysTime = new HashMap<String, Histogram>();
public final Histogram intraColoCheckMissingKeysTime;
public final Map<String, Histogram> interColoProcessMetadataResponseTime = new HashMap<String, Histogram>();
public final Histogram intraColoProcessMetadataResponseTime;
public final Map<String, Histogram> interColoGetRequestTime = new HashMap<String, Histogram>();
public final Histogram intraColoGetRequestTime;
public final Map<String, Histogram> plainTextInterColoGetRequestTime = new HashMap<String, Histogram>();
public final Histogram plainTextIntraColoGetRequestTime;
public final Map<String, Histogram> sslInterColoGetRequestTime = new HashMap<String, Histogram>();
public final Histogram sslIntraColoGetRequestTime;
public final Map<String, Histogram> interColoBatchStoreWriteTime = new HashMap<String, Histogram>();
public final Histogram intraColoBatchStoreWriteTime;
public final Map<String, Histogram> plainTextInterColoBatchStoreWriteTime = new HashMap<String, Histogram>();
public final Histogram plainTextIntraColoBatchStoreWriteTime;
public final Map<String, Histogram> sslInterColoBatchStoreWriteTime = new HashMap<String, Histogram>();
public final Histogram sslIntraColoBatchStoreWriteTime;
public final Map<String, Histogram> interColoTotalReplicationTime = new HashMap<String, Histogram>();
public final Histogram intraColoTotalReplicationTime;
public final Map<String, Histogram> plainTextInterColoTotalReplicationTime = new HashMap<String, Histogram>();
public final Histogram plainTextIntraColoTotalReplicationTime;
public final Map<String, Histogram> sslInterColoTotalReplicationTime = new HashMap<String, Histogram>();
public final Histogram sslIntraColoTotalReplicationTime;
public List<Gauge<Long>> replicaLagInBytes;
private MetricRegistry registry;
private Map<String, Counter> metadataRequestErrorMap;
private Map<String, Counter> getRequestErrorMap;
private Map<String, Counter> localStoreErrorMap;
private Map<PartitionId, Counter> partitionIdToInvalidMessageStreamErrorCounter;
public ReplicationMetrics(MetricRegistry registry, List<? extends ReplicaId> replicaIds) {
metadataRequestErrorMap = new HashMap<String, Counter>();
getRequestErrorMap = new HashMap<String, Counter>();
localStoreErrorMap = new HashMap<String, Counter>();
partitionIdToInvalidMessageStreamErrorCounter = new HashMap<PartitionId, Counter>();
intraColoReplicationBytesRate =
registry.meter(MetricRegistry.name(ReplicaThread.class, "IntraColoReplicationBytesRate"));
plainTextIntraColoReplicationBytesRate =
registry.meter(MetricRegistry.name(ReplicaThread.class, "PlainTextIntraColoReplicationBytesRate"));
sslIntraColoReplicationBytesRate =
registry.meter(MetricRegistry.name(ReplicaThread.class, "SslIntraColoReplicationBytesRate"));
intraColoMetadataExchangeCount =
registry.counter(MetricRegistry.name(ReplicaThread.class, "IntraColoMetadataExchangeCount"));
intraColoBlobsReplicatedCount =
registry.counter(MetricRegistry.name(ReplicaThread.class, "IntraColoBlobsReplicatedCount"));
unknownRemoteReplicaRequestCount =
registry.counter(MetricRegistry.name(ReplicaThread.class, "UnknownRemoteReplicaRequestCount"));
plainTextIntraColoMetadataExchangeCount =
registry.counter(MetricRegistry.name(ReplicaThread.class, "PlainTextIntraColoMetadataExchangeCount"));
plainTextIntraColoBlobsReplicatedCount =
registry.counter(MetricRegistry.name(ReplicaThread.class, "PlainTextIntraColoBlobsReplicatedCount"));
sslIntraColoMetadataExchangeCount =
registry.counter(MetricRegistry.name(ReplicaThread.class, "SslIntraColoMetadataExchangeCount"));
sslIntraColoBlobsReplicatedCount =
registry.counter(MetricRegistry.name(ReplicaThread.class, "SslIntraColoBlobsReplicatedCount"));
replicationErrors = registry.counter(MetricRegistry.name(ReplicaThread.class, "ReplicationErrors"));
plainTextReplicationErrors =
registry.counter(MetricRegistry.name(ReplicaThread.class, "PlainTextReplicationErrors"));
sslReplicationErrors = registry.counter(MetricRegistry.name(ReplicaThread.class, "SslReplicationErrors"));
replicationTokenResetCount =
registry.counter(MetricRegistry.name(ReplicaThread.class, "ReplicationTokenResetCount"));
replicationInvalidMessageStreamErrorCount =
registry.counter(MetricRegistry.name(ReplicaThread.class, "ReplicationInvalidMessageStreamErrorCount"));
intraColoReplicationLatency =
registry.timer(MetricRegistry.name(ReplicaThread.class, "IntraColoReplicationLatency"));
plainTextIntraColoReplicationLatency =
registry.timer(MetricRegistry.name(ReplicaThread.class, "PlainTextIntraColoReplicationLatency"));
sslIntraColoReplicationLatency =
registry.timer(MetricRegistry.name(ReplicaThread.class, "SslIntraColoReplicationLatency"));
remoteReplicaTokensPersistTime =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "RemoteReplicaTokensPersistTime"));
remoteReplicaTokensRestoreTime =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "RemoteReplicaTokensRestoreTime"));
intraColoExchangeMetadataTime =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "IntraColoExchangeMetadataTime"));
plainTextIntraColoExchangeMetadataTime =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "PlainTextIntraColoExchangeMetadataTime"));
sslIntraColoExchangeMetadataTime =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "SslIntraColoExchangeMetadataTime"));
intraColoFixMissingKeysTime =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "IntraColoFixMissingKeysTime"));
plainTextIntraColoFixMissingKeysTime =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "PlainTextIntraColoFixMissingKeysTime"));
sslIntraColoFixMissingKeysTime =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "SslIntraColoFixMissingKeysTime"));
intraColoReplicationWaitTime =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "IntraColoReplicationWaitTime"));
intraColoReplicationMetadataRequestTime =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "IntraColoReplicationMetadataRequestTime"));
plainTextIntraColoReplicationMetadataRequestTime = registry.histogram(
MetricRegistry.name(ReplicaThread.class, "PlainTextIntraColoReplicationMetadataRequestTime"));
sslIntraColoReplicationMetadataRequestTime =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "SslIntraColoReplicationMetadataRequestTime"));
intraColoCheckMissingKeysTime =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "IntraColoCheckMissingKeysTime"));
intraColoProcessMetadataResponseTime =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "IntraColoProcessMetadataResponseTime"));
intraColoGetRequestTime = registry.histogram(MetricRegistry.name(ReplicaThread.class, "IntraColoGetRequestTime"));
plainTextIntraColoGetRequestTime =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "PlainTextIntraColoGetRequestTime"));
sslIntraColoGetRequestTime =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "SslIntraColoGetRequestTime"));
intraColoBatchStoreWriteTime =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "IntraColoBatchStoreWriteTime"));
plainTextIntraColoBatchStoreWriteTime =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "PlainTextIntraColoBatchStoreWriteTime"));
sslIntraColoBatchStoreWriteTime =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "SslIntraColoBatchStoreWriteTime"));
intraColoTotalReplicationTime =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "IntraColoTotalReplicationTime"));
plainTextIntraColoTotalReplicationTime =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "PlainTextIntraColoTotalReplicationTime"));
sslIntraColoTotalReplicationTime =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "SslIntraColoTotalReplicationTime"));
this.registry = registry;
this.replicaLagInBytes = new ArrayList<Gauge<Long>>();
populateInvalidMessageMetricForReplicas(replicaIds);
}
/**
* Updates per colo metrics for each thread pool
* @param datacenters List of datacenters to replicate from
*/
public void populatePerColoMetrics(Set<String> datacenters) {
for (String datacenter : datacenters) {
Meter interColoReplicationBytesRatePerDC =
registry.meter(MetricRegistry.name(ReplicaThread.class, "Inter-" + datacenter + "-ReplicationBytesRate"));
interColoReplicationBytesRate.put(datacenter, interColoReplicationBytesRatePerDC);
Meter plainTextInterColoReplicationBytesRatePerDC = registry.meter(
MetricRegistry.name(ReplicaThread.class, "PlainTextInter-" + datacenter + "-ReplicationBytesRate"));
plainTextInterColoReplicationBytesRate.put(datacenter, plainTextInterColoReplicationBytesRatePerDC);
Meter sslInterColoReplicationBytesRatePerDC =
registry.meter(MetricRegistry.name(ReplicaThread.class, "SslInter-" + datacenter + "-ReplicationBytesRate"));
sslInterColoReplicationBytesRate.put(datacenter, sslInterColoReplicationBytesRatePerDC);
Counter interColoMetadataExchangeCountPerDC =
registry.counter(MetricRegistry.name(ReplicaThread.class, "Inter-" + datacenter + "-MetadataExchangeCount"));
interColoMetadataExchangeCount.put(datacenter, interColoMetadataExchangeCountPerDC);
Counter interColoBlobsReplicatedCountPerDC =
registry.counter(MetricRegistry.name(ReplicaThread.class, "Inter-" + datacenter + "-ReplicationBlobsCount"));
interColoBlobsReplicatedCount.put(datacenter, interColoBlobsReplicatedCountPerDC);
Counter plainTextInterColoMetadataExchangeCountPerDC = registry.counter(
MetricRegistry.name(ReplicaThread.class, "PlainTextInter-" + datacenter + "-MetadataExchangeCount"));
plainTextInterColoMetadataExchangeCount.put(datacenter, plainTextInterColoMetadataExchangeCountPerDC);
Counter plainTextInterColoBlobsReplicatedCountPerDC = registry.counter(
MetricRegistry.name(ReplicaThread.class, "PlainTextInter-" + datacenter + "-BlobsReplicatedCount"));
plainTextInterColoBlobsReplicatedCount.put(datacenter, plainTextInterColoBlobsReplicatedCountPerDC);
Counter sslInterColoMetadataExchangeCountPerDC = registry.counter(
MetricRegistry.name(ReplicaThread.class, "SslInter-" + datacenter + "-MetadataExchangeCount"));
sslInterColoMetadataExchangeCount.put(datacenter, sslInterColoMetadataExchangeCountPerDC);
Counter sslInterColoBlobsReplicatedCountPerDC = registry.counter(
MetricRegistry.name(ReplicaThread.class, "SslInter-" + datacenter + "-BlobsReplicatedCount"));
sslInterColoBlobsReplicatedCount.put(datacenter, sslInterColoBlobsReplicatedCountPerDC);
Timer interColoReplicationLatencyPerDC =
registry.timer(MetricRegistry.name(ReplicaThread.class, "Inter-" + datacenter + "-ReplicationLatency"));
interColoReplicationLatency.put(datacenter, interColoReplicationLatencyPerDC);
Timer plainTextInterColoReplicationLatencyPerDC = registry.timer(
MetricRegistry.name(ReplicaThread.class, "PlainTextInter-" + datacenter + "-ReplicationLatency"));
plainTextInterColoReplicationLatency.put(datacenter, plainTextInterColoReplicationLatencyPerDC);
Timer sslInterColoReplicationLatencyPerDC =
registry.timer(MetricRegistry.name(ReplicaThread.class, "SslInter-" + datacenter + "-ReplicationLatency"));
sslInterColoReplicationLatency.put(datacenter, sslInterColoReplicationLatencyPerDC);
Histogram interColoExchangeMetadataTimePerDC =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "Inter-" + datacenter + "-ExchangeMetadataTime"));
interColoExchangeMetadataTime.put(datacenter, interColoExchangeMetadataTimePerDC);
Histogram plainTextInterColoExchangeMetadataTimePerDC = registry.histogram(
MetricRegistry.name(ReplicaThread.class, "PlainTextInter-" + datacenter + "-ExchangeMetadataTime"));
plainTextInterColoExchangeMetadataTime.put(datacenter, plainTextInterColoExchangeMetadataTimePerDC);
Histogram sslInterColoExchangeMetadataTimePerDC = registry.histogram(
MetricRegistry.name(ReplicaThread.class, "SslInter-" + datacenter + "-ExchangeMetadataTime"));
sslInterColoExchangeMetadataTime.put(datacenter, sslInterColoExchangeMetadataTimePerDC);
Histogram interColoFixMissingKeysTimePerDC =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "Inter-" + datacenter + "-FixMissingKeysTime"));
interColoFixMissingKeysTime.put(datacenter, interColoFixMissingKeysTimePerDC);
Histogram plainTextInterColoFixMissingKeysTimePerDC = registry.histogram(
MetricRegistry.name(ReplicaThread.class, "PlainTextInter-" + datacenter + "-FixMissingKeysTime"));
plainTextInterColoFixMissingKeysTime.put(datacenter, plainTextInterColoFixMissingKeysTimePerDC);
Histogram sslInterColoFixMissingKeysTimePerDC = registry.histogram(
MetricRegistry.name(ReplicaThread.class, "SslInter-" + datacenter + "-FixMissingKeysTime"));
sslInterColoFixMissingKeysTime.put(datacenter, sslInterColoFixMissingKeysTimePerDC);
Histogram interColoReplicationMetadataRequestTimePerDC = registry.histogram(
MetricRegistry.name(ReplicaThread.class, "Inter-" + datacenter + "-ReplicationMetadataRequestTime"));
interColoReplicationMetadataRequestTime.put(datacenter, interColoReplicationMetadataRequestTimePerDC);
Histogram plainTextInterColoReplicationMetadataRequestTimePerDC = registry.histogram(
MetricRegistry.name(ReplicaThread.class, "PlainTextInter-" + datacenter + "-ReplicationMetadataRequestTime"));
plainTextInterColoReplicationMetadataRequestTime.put(datacenter,
plainTextInterColoReplicationMetadataRequestTimePerDC);
Histogram sslInterColoReplicationMetadataRequestTimePerDC = registry.histogram(
MetricRegistry.name(ReplicaThread.class, "SslInter-" + datacenter + "-ReplicationMetadataRequestTime"));
sslInterColoReplicationMetadataRequestTime.put(datacenter, sslInterColoReplicationMetadataRequestTimePerDC);
Histogram interColoCheckMissingKeysTimePerDC =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "Inter-" + datacenter + "-CheckMissingKeysTime"));
interColoCheckMissingKeysTime.put(datacenter, interColoCheckMissingKeysTimePerDC);
Histogram interColoProcessMetadataResponseTimePerDC = registry.histogram(
MetricRegistry.name(ReplicaThread.class, "Inter-" + datacenter + "-ProcessMetadataResponseTime"));
interColoProcessMetadataResponseTime.put(datacenter, interColoProcessMetadataResponseTimePerDC);
Histogram interColoGetRequestTimePerDC =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "Inter-" + datacenter + "-GetRequestTime"));
interColoGetRequestTime.put(datacenter, interColoGetRequestTimePerDC);
Histogram plainTextInterColoGetRequestTimePerDC = registry.histogram(
MetricRegistry.name(ReplicaThread.class, "PlainTextInter-" + datacenter + "-GetRequestTime"));
plainTextInterColoGetRequestTime.put(datacenter, plainTextInterColoGetRequestTimePerDC);
Histogram sslInterColoGetRequestTimePerDC =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "SslInter-" + datacenter + "-GetRequestTime"));
sslInterColoGetRequestTime.put(datacenter, sslInterColoGetRequestTimePerDC);
Histogram interColoBatchStoreWriteTimePerDC =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "Inter-" + datacenter + "-BatchStoreWriteTime"));
interColoBatchStoreWriteTime.put(datacenter, interColoBatchStoreWriteTimePerDC);
Histogram plainTextInterColoBatchStoreWriteTimePerDC = registry.histogram(
MetricRegistry.name(ReplicaThread.class, "PlainTextInter-" + datacenter + "-BatchStoreWriteTime"));
plainTextInterColoBatchStoreWriteTime.put(datacenter, plainTextInterColoBatchStoreWriteTimePerDC);
Histogram sslInterColoBatchStoreWriteTimePerDC = registry.histogram(
MetricRegistry.name(ReplicaThread.class, "SslInter-" + datacenter + "-BatchStoreWriteTime"));
sslInterColoBatchStoreWriteTime.put(datacenter, sslInterColoBatchStoreWriteTimePerDC);
Histogram interColoTotalReplicationTimePerDC =
registry.histogram(MetricRegistry.name(ReplicaThread.class, "Inter-" + datacenter + "-TotalReplicationTime"));
interColoTotalReplicationTime.put(datacenter, interColoTotalReplicationTimePerDC);
Histogram plainTextInterColoTotalReplicationTimePerDC = registry.histogram(
MetricRegistry.name(ReplicaThread.class, "PlainTextInter-" + datacenter + "-TotalReplicationTime"));
plainTextInterColoTotalReplicationTime.put(datacenter, plainTextInterColoTotalReplicationTimePerDC);
Histogram sslInterColoTotalReplicationTimePerDC = registry.histogram(
MetricRegistry.name(ReplicaThread.class, "SslInter-" + datacenter + "-TotalReplicationTime"));
sslInterColoTotalReplicationTime.put(datacenter, sslInterColoTotalReplicationTimePerDC);
}
}
/**
* Register metrics for measuring the number of active intra and inter colo replica threads.
*
* @param replicaThreadPools A map of datacenter names to {@link ReplicaThread}s handling replication from that
* datacenter
* @param localDatacenter The datacenter on which the {@link ReplicationManager} is running
*/
void trackLiveThreadsCount(final Map<String, ArrayList<ReplicaThread>> replicaThreadPools, String localDatacenter) {
for (final String datacenter : replicaThreadPools.keySet()) {
Gauge<Integer> liveThreadsPerDatacenter = new Gauge<Integer>() {
@Override
public Integer getValue() {
return getLiveThreads(replicaThreadPools.get(datacenter));
}
};
if (localDatacenter.equals(datacenter)) {
registry.register(MetricRegistry.name(ReplicaThread.class, "NumberOfIntra-Colo-ReplicaThreads"),
liveThreadsPerDatacenter);
} else {
registry.register(MetricRegistry.name(ReplicaThread.class, "NumberOfInter-" + datacenter + "-ReplicaThreads"),
liveThreadsPerDatacenter);
}
}
}
private int getLiveThreads(List<ReplicaThread> replicaThreads) {
int count = 0;
for (ReplicaThread thread : replicaThreads) {
if (thread.isThreadUp()) {
count++;
}
}
return count;
}
public void addRemoteReplicaToLagMetrics(final RemoteReplicaInfo remoteReplicaInfo) {
ReplicaId replicaId = remoteReplicaInfo.getReplicaId();
DataNodeId dataNodeId = replicaId.getDataNodeId();
final String metricName =
dataNodeId.getHostname() + "-" + dataNodeId.getPort() + "-" + replicaId.getPartitionId() + "-replicaLagInBytes";
Gauge<Long> replicaLag = new Gauge<Long>() {
@Override
public Long getValue() {
return remoteReplicaInfo.getRemoteLagFromLocalInBytes();
}
};
registry.register(MetricRegistry.name(ReplicationMetrics.class, metricName), replicaLag);
replicaLagInBytes.add(replicaLag);
}
public void populateInvalidMessageMetricForReplicas(List<? extends ReplicaId> replicaIds) {
for (ReplicaId replicaId : replicaIds) {
PartitionId partitionId = replicaId.getPartitionId();
if (!partitionIdToInvalidMessageStreamErrorCounter.containsKey(partitionId)) {
Counter partitionBasedCorruptionErrorCount =
registry.counter(MetricRegistry.name(ReplicaThread.class, partitionId + "-CorruptionErrorCount"));
partitionIdToInvalidMessageStreamErrorCounter.put(partitionId, partitionBasedCorruptionErrorCount);
}
}
}
public void incrementInvalidMessageError(PartitionId partitionId) {
replicationInvalidMessageStreamErrorCount.inc();
if (partitionIdToInvalidMessageStreamErrorCounter.containsKey(partitionId)) {
partitionIdToInvalidMessageStreamErrorCounter.get(partitionId).inc();
}
}
public void createRemoteReplicaErrorMetrics(RemoteReplicaInfo remoteReplicaInfo) {
String metadataRequestErrorMetricName =
remoteReplicaInfo.getReplicaId().getDataNodeId().getHostname() + "-" + remoteReplicaInfo.getReplicaId()
.getDataNodeId()
.getPort() + "-" + remoteReplicaInfo.getReplicaId().getPartitionId().toString() + "-metadataRequestError";
Counter metadataRequestError =
registry.counter(MetricRegistry.name(ReplicaThread.class, metadataRequestErrorMetricName));
metadataRequestErrorMap.put(metadataRequestErrorMetricName, metadataRequestError);
String getRequestErrorMetricName =
remoteReplicaInfo.getReplicaId().getDataNodeId().getHostname() + "-" + remoteReplicaInfo.getReplicaId()
.getDataNodeId()
.getPort() + "-" + remoteReplicaInfo.getReplicaId().getPartitionId().toString() + "-getRequestError";
Counter getRequestError = registry.counter(MetricRegistry.name(ReplicaThread.class, getRequestErrorMetricName));
getRequestErrorMap.put(getRequestErrorMetricName, getRequestError);
String localStoreErrorMetricName =
remoteReplicaInfo.getReplicaId().getDataNodeId().getHostname() + "-" + remoteReplicaInfo.getReplicaId()
.getDataNodeId()
.getPort() + "-" + remoteReplicaInfo.getReplicaId().getPartitionId().toString() + "-localStoreError";
Counter localStoreError = registry.counter(MetricRegistry.name(ReplicaThread.class, localStoreErrorMetricName));
localStoreErrorMap.put(localStoreErrorMetricName, localStoreError);
}
public void updateMetadataRequestError(ReplicaId remoteReplica) {
String metadataRequestErrorMetricName =
remoteReplica.getDataNodeId().getHostname() + "-" + remoteReplica.getDataNodeId().getPort() + "-"
+ remoteReplica.getPartitionId().toString() + "-metadataRequestError";
metadataRequestErrorMap.get(metadataRequestErrorMetricName).inc();
}
public void updateGetRequestError(ReplicaId remoteReplica) {
String getRequestErrorMetricName =
remoteReplica.getDataNodeId().getHostname() + "-" + remoteReplica.getDataNodeId().getPort() + "-"
+ remoteReplica.getPartitionId().toString() + "-getRequestError";
getRequestErrorMap.get(getRequestErrorMetricName).inc();
}
public void updateLocalStoreError(ReplicaId remoteReplica) {
String localStoreErrorMetricName =
remoteReplica.getDataNodeId().getHostname() + "-" + remoteReplica.getDataNodeId().getPort() + "-"
+ remoteReplica.getPartitionId().toString() + "-localStoreError";
localStoreErrorMap.get(localStoreErrorMetricName).inc();
}
public void incrementReplicationErrors(boolean sslEnabled) {
replicationErrors.inc();
if (sslEnabled) {
sslReplicationErrors.inc();
} else {
plainTextReplicationErrors.inc();
}
}
public void updateTotalReplicationTime(long totalReplicationTime, boolean remoteColo, boolean sslEnabled,
String datacenter) {
if (remoteColo) {
interColoTotalReplicationTime.get(datacenter).update(totalReplicationTime);
if (sslEnabled) {
sslInterColoTotalReplicationTime.get(datacenter).update(totalReplicationTime);
} else {
plainTextInterColoTotalReplicationTime.get(datacenter).update(totalReplicationTime);
}
} else {
intraColoTotalReplicationTime.update(totalReplicationTime);
if (sslEnabled) {
sslIntraColoTotalReplicationTime.update(totalReplicationTime);
} else {
plainTextIntraColoTotalReplicationTime.update(totalReplicationTime);
}
}
}
public void updateExchangeMetadataTime(long exchangeMetadataTime, boolean remoteColo, boolean sslEnabled,
String datacenter) {
if (remoteColo) {
interColoMetadataExchangeCount.get(datacenter).inc();
interColoExchangeMetadataTime.get(datacenter).update(exchangeMetadataTime);
if (sslEnabled) {
sslInterColoMetadataExchangeCount.get(datacenter).inc();
sslInterColoExchangeMetadataTime.get(datacenter).update(exchangeMetadataTime);
} else {
plainTextInterColoMetadataExchangeCount.get(datacenter).inc();
plainTextInterColoExchangeMetadataTime.get(datacenter).update(exchangeMetadataTime);
}
} else {
intraColoMetadataExchangeCount.inc();
intraColoExchangeMetadataTime.update(exchangeMetadataTime);
if (sslEnabled) {
sslIntraColoMetadataExchangeCount.inc();
sslIntraColoExchangeMetadataTime.update(exchangeMetadataTime);
} else {
plainTextIntraColoMetadataExchangeCount.inc();
plainTextIntraColoExchangeMetadataTime.update(exchangeMetadataTime);
}
}
}
public void updateCheckMissingKeysTime(long checkMissingKeyTime, boolean remoteColo, String datacenterName) {
if (remoteColo) {
interColoCheckMissingKeysTime.get(datacenterName).update(checkMissingKeyTime);
} else {
intraColoCheckMissingKeysTime.update(checkMissingKeyTime);
}
}
public void updateFixMissingStoreKeysTime(long fixMissingStoreKeysTime, boolean remoteColo, boolean sslEnabled,
String datacenter) {
if (remoteColo) {
interColoFixMissingKeysTime.get(datacenter).update(fixMissingStoreKeysTime);
if (sslEnabled) {
sslInterColoFixMissingKeysTime.get(datacenter).update(fixMissingStoreKeysTime);
} else {
plainTextInterColoFixMissingKeysTime.get(datacenter).update(fixMissingStoreKeysTime);
}
} else {
intraColoFixMissingKeysTime.update(fixMissingStoreKeysTime);
if (sslEnabled) {
sslIntraColoFixMissingKeysTime.update(fixMissingStoreKeysTime);
} else {
plainTextIntraColoFixMissingKeysTime.update(fixMissingStoreKeysTime);
}
}
}
public void updateMetadataRequestTime(long metadataRequestTime, boolean remoteColo, boolean sslEnabled,
String datacenter) {
if (remoteColo) {
interColoReplicationMetadataRequestTime.get(datacenter).update(metadataRequestTime);
if (sslEnabled) {
sslInterColoReplicationMetadataRequestTime.get(datacenter).update(metadataRequestTime);
} else {
plainTextInterColoReplicationMetadataRequestTime.get(datacenter).update(metadataRequestTime);
}
} else {
intraColoReplicationMetadataRequestTime.update(metadataRequestTime);
if (sslEnabled) {
sslIntraColoReplicationMetadataRequestTime.update(metadataRequestTime);
} else {
plainTextIntraColoReplicationMetadataRequestTime.update(metadataRequestTime);
}
}
}
public void updateGetRequestTime(long getRequestTime, boolean remoteColo, boolean sslEnabled, String datacenter) {
if (remoteColo) {
interColoGetRequestTime.get(datacenter).update(getRequestTime);
if (sslEnabled) {
sslInterColoGetRequestTime.get(datacenter).update(getRequestTime);
} else {
plainTextInterColoGetRequestTime.get(datacenter).update(getRequestTime);
}
} else {
intraColoGetRequestTime.update(getRequestTime);
if (sslEnabled) {
sslIntraColoGetRequestTime.update(getRequestTime);
} else {
plainTextIntraColoGetRequestTime.update(getRequestTime);
}
}
}
public void updateBatchStoreWriteTime(long batchStoreWriteTime, long totalBytesFixed, long totalBlobsFixed,
boolean remoteColo, boolean sslEnabled, String datacenter) {
if (remoteColo) {
interColoReplicationBytesRate.get(datacenter).mark(totalBytesFixed);
interColoBlobsReplicatedCount.get(datacenter).inc(totalBlobsFixed);
interColoBatchStoreWriteTime.get(datacenter).update(batchStoreWriteTime);
if (sslEnabled) {
sslInterColoReplicationBytesRate.get(datacenter).mark(totalBytesFixed);
sslInterColoBlobsReplicatedCount.get(datacenter).inc(totalBlobsFixed);
sslInterColoBatchStoreWriteTime.get(datacenter).update(batchStoreWriteTime);
} else {
plainTextInterColoReplicationBytesRate.get(datacenter).mark(totalBytesFixed);
plainTextInterColoBlobsReplicatedCount.get(datacenter).inc(totalBlobsFixed);
plainTextInterColoBatchStoreWriteTime.get(datacenter).update(batchStoreWriteTime);
}
} else {
intraColoReplicationBytesRate.mark(totalBytesFixed);
intraColoBlobsReplicatedCount.inc(totalBlobsFixed);
intraColoBatchStoreWriteTime.update(batchStoreWriteTime);
if (sslEnabled) {
sslIntraColoReplicationBytesRate.mark(totalBytesFixed);
sslIntraColoBlobsReplicatedCount.inc(totalBlobsFixed);
sslIntraColoBatchStoreWriteTime.update(batchStoreWriteTime);
} else {
plainTextIntraColoReplicationBytesRate.mark(totalBytesFixed);
plainTextIntraColoBlobsReplicatedCount.inc(totalBlobsFixed);
plainTextIntraColoBatchStoreWriteTime.update(batchStoreWriteTime);
}
}
}
}