/** * Copyright 2016 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ package com.github.ambry.clustermap; import com.codahale.metrics.Gauge; import com.codahale.metrics.MetricRegistry; import java.util.ArrayList; import java.util.List; /** * Metrics for the {@link StaticClusterManager}. */ class ClusterMapMetrics { private MetricRegistry registry; private final HardwareLayout hardwareLayout; private final PartitionLayout partitionLayout; public final Gauge<Long> hardwareLayoutVersion; public final Gauge<Long> partitionLayoutVersion; public final Gauge<Long> datacenterCount; public final Gauge<Long> dataNodeCount; public final Gauge<Long> diskCount; public final Gauge<Long> dataNodesHardUpCount; public final Gauge<Long> dataNodesHardDownCount; public final Gauge<Long> dataNodesUnavailableCount; public List<Gauge<Long>> dataNodeStateList; public final Gauge<Long> disksHardUpCount; public final Gauge<Long> disksHardDownCount; public final Gauge<Long> disksUnavailableCount; public List<Gauge<Long>> diskStateList; public final Gauge<Long> partitionCount; public final Gauge<Long> partitionsReadWrite; public final Gauge<Long> partitionsReadOnly; public final Gauge<Boolean> isMajorityReplicasDown; public final Gauge<Long> rawCapacityInBytes; public final Gauge<Long> allocatedRawCapacityInBytes; public final Gauge<Long> allocatedUsableCapacityInBytes; /** * Metrics for the {@link StaticClusterManager} * @param hardwareLayout The {@link HardwareLayout} associated with the {@link StaticClusterManager} * @param partitionLayout The {@link PartitionLayout} associated with the {@link StaticClusterManager} * @param registry The {@link MetricRegistry} associated with the {@link StaticClusterManager} */ public ClusterMapMetrics(HardwareLayout hardwareLayout, PartitionLayout partitionLayout, MetricRegistry registry) { this.registry = registry; this.hardwareLayout = hardwareLayout; this.partitionLayout = partitionLayout; // Metrics based on HardwareLayout this.hardwareLayoutVersion = new Gauge<Long>() { @Override public Long getValue() { return getHardwareLayoutVersion(); } }; this.partitionLayoutVersion = new Gauge<Long>() { @Override public Long getValue() { return getPartitionLayoutVersion(); } }; registry.register(MetricRegistry.name(ClusterMap.class, "hardwareLayoutVersion"), hardwareLayoutVersion); registry.register(MetricRegistry.name(ClusterMap.class, "partitionLayoutVersion"), partitionLayoutVersion); this.datacenterCount = new Gauge<Long>() { @Override public Long getValue() { return countDatacenters(); } }; this.dataNodeCount = new Gauge<Long>() { @Override public Long getValue() { return countDataNodes(); } }; this.diskCount = new Gauge<Long>() { @Override public Long getValue() { return countDisks(); } }; registry.register(MetricRegistry.name(ClusterMap.class, "datacenterCount"), datacenterCount); registry.register(MetricRegistry.name(ClusterMap.class, "dataNodeCount"), dataNodeCount); registry.register(MetricRegistry.name(ClusterMap.class, "diskCount"), diskCount); this.dataNodesHardUpCount = new Gauge<Long>() { @Override public Long getValue() { return countDataNodesInHardState(HardwareState.AVAILABLE); } }; this.dataNodesHardDownCount = new Gauge<Long>() { @Override public Long getValue() { return countDataNodesInHardState(HardwareState.UNAVAILABLE); } }; this.dataNodesUnavailableCount = new Gauge<Long>() { @Override public Long getValue() { return countUnavailableDataNodes(); } }; this.disksHardUpCount = new Gauge<Long>() { @Override public Long getValue() { return countDisksInHardState(HardwareState.AVAILABLE); } }; this.disksHardDownCount = new Gauge<Long>() { @Override public Long getValue() { return countDisksInHardState(HardwareState.UNAVAILABLE); } }; this.disksUnavailableCount = new Gauge<Long>() { @Override public Long getValue() { return countUnavailableDisks(); } }; registry.register(MetricRegistry.name(ClusterMap.class, "dataNodesHardUpCount"), dataNodesHardUpCount); registry.register(MetricRegistry.name(ClusterMap.class, "dataNodesHardDownCount"), dataNodesHardDownCount); registry.register(MetricRegistry.name(ClusterMap.class, "dataNodesUnavailableCount"), dataNodesUnavailableCount); registry.register(MetricRegistry.name(ClusterMap.class, "disksHardUpCount"), disksHardUpCount); registry.register(MetricRegistry.name(ClusterMap.class, "disksHardDownCount"), disksHardDownCount); registry.register(MetricRegistry.name(ClusterMap.class, "disksUnavailableCount"), disksUnavailableCount); // Metrics based on PartitionLayout this.partitionCount = new Gauge<Long>() { @Override public Long getValue() { return countPartitions(); } }; this.partitionsReadWrite = new Gauge<Long>() { @Override public Long getValue() { return countPartitionsInState(PartitionState.READ_WRITE); } }; this.partitionsReadOnly = new Gauge<Long>() { @Override public Long getValue() { return countPartitionsInState(PartitionState.READ_ONLY); } }; registry.register(MetricRegistry.name(ClusterMap.class, "numberOfPartitions"), partitionCount); registry.register(MetricRegistry.name(ClusterMap.class, "numberOfReadWritePartitions"), partitionsReadWrite); registry.register(MetricRegistry.name(ClusterMap.class, "numberOfReadOnlyPartitions"), partitionsReadOnly); this.isMajorityReplicasDown = new Gauge<Boolean>() { @Override public Boolean getValue() { return isMajorityOfReplicasDown(); } }; registry.register(MetricRegistry.name(ClusterMap.class, "isMajorityReplicasDown"), isMajorityReplicasDown); this.rawCapacityInBytes = new Gauge<Long>() { @Override public Long getValue() { return getRawCapacity(); } }; this.allocatedRawCapacityInBytes = new Gauge<Long>() { @Override public Long getValue() { return getAllocatedRawCapacity(); } }; this.allocatedUsableCapacityInBytes = new Gauge<Long>() { @Override public Long getValue() { return getAllocatedUsableCapacity(); } }; registry.register(MetricRegistry.name(ClusterMap.class, "rawCapacityInBytes"), rawCapacityInBytes); registry.register(MetricRegistry.name(ClusterMap.class, "allocatedRawCapacityInBytes"), allocatedRawCapacityInBytes); registry.register(MetricRegistry.name(ClusterMap.class, "allocatedUsableCapacityInBytes"), allocatedUsableCapacityInBytes); dataNodeStateList = new ArrayList<Gauge<Long>>(); diskStateList = new ArrayList<Gauge<Long>>(); for (Datacenter datacenter : hardwareLayout.getDatacenters()) { for (DataNode dataNode : datacenter.getDataNodes()) { addDataNodeToStateMetrics(dataNode); for (Disk disk : dataNode.getDisks()) { addDiskToStateMetrics(disk); } } } } private void addDataNodeToStateMetrics(final DataNode dataNode) { final String metricName = dataNode.getHostname() + "-" + dataNode.getPort() + "-ResourceState"; Gauge<Long> dataNodeState = new Gauge<Long>() { @Override public Long getValue() { return dataNode.getState() == HardwareState.AVAILABLE ? 1L : 0L; } }; registry.register(MetricRegistry.name(ClusterMap.class, metricName), dataNodeState); dataNodeStateList.add(dataNodeState); } private void addDiskToStateMetrics(final Disk disk) { final String metricName = disk.getDataNode().getHostname() + "-" + disk.getDataNode().getPort() + "-" + disk.getMountPath() + "-ResourceState"; Gauge<Long> diskState = new Gauge<Long>() { @Override public Long getValue() { return disk.getState() == HardwareState.AVAILABLE ? 1L : 0L; } }; registry.register(MetricRegistry.name(ClusterMap.class, metricName), diskState); dataNodeStateList.add(diskState); } private boolean isMajorityOfReplicasDown() { boolean isMajorityReplicasDown = false; for (PartitionId partition : partitionLayout.getPartitions()) { List<? extends ReplicaId> replicas = partition.getReplicaIds(); int replicaCount = replicas.size(); int downReplicas = 0; for (ReplicaId replicaId : replicas) { if (replicaId.isDown()) { downReplicas++; } } if (downReplicas > replicaCount / 2) { isMajorityReplicasDown = true; break; } } return isMajorityReplicasDown; } private long getHardwareLayoutVersion() { return hardwareLayout.getVersion(); } private long getPartitionLayoutVersion() { return partitionLayout.getVersion(); } private long countDatacenters() { return hardwareLayout.getDatacenterCount(); } private long countDataNodes() { return hardwareLayout.getDataNodeCount(); } private long countDisks() { return hardwareLayout.getDiskCount(); } private long countDataNodesInHardState(HardwareState hardwareState) { return hardwareLayout.getDataNodeInHardStateCount(hardwareState); } private long countUnavailableDataNodes() { return hardwareLayout.calculateUnavailableDataNodeCount(); } private long countDisksInHardState(HardwareState hardwareState) { return hardwareLayout.getDiskInHardStateCount(hardwareState); } private long countUnavailableDisks() { return hardwareLayout.calculateUnavailableDiskCount(); } private long countPartitions() { return partitionLayout.getPartitionCount(); } private long countPartitionsInState(PartitionState partitionState) { return partitionLayout.getPartitionInStateCount(partitionState); } private long getRawCapacity() { return hardwareLayout.getRawCapacityInBytes(); } private long getAllocatedRawCapacity() { return partitionLayout.getAllocatedRawCapacityInBytes(); } private long getAllocatedUsableCapacity() { return partitionLayout.getAllocatedUsableCapacityInBytes(); } }