/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.datanode.metrics;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.DataNodeVolumeMetrics;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
import org.apache.hadoop.util.Daemon;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
/**
* This class detects and maintains DataNode disk outliers and their
* latencies for different ops (metadata, read, write).
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class DataNodeDiskMetrics {
public static final Logger LOG = LoggerFactory.getLogger(
DataNodeDiskMetrics.class);
private DataNode dn;
private final long MIN_OUTLIER_DETECTION_DISKS = 5;
private final long SLOW_DISK_LOW_THRESHOLD_MS = 20;
private final long detectionInterval;
private volatile boolean shouldRun;
private OutlierDetector slowDiskDetector;
private Daemon slowDiskDetectionDaemon;
private volatile Map<String, Map<DiskOutlierDetectionOp, Double>>
diskOutliersStats = Maps.newHashMap();
public DataNodeDiskMetrics(DataNode dn, long diskOutlierDetectionIntervalMs) {
this.dn = dn;
this.detectionInterval = diskOutlierDetectionIntervalMs;
slowDiskDetector = new OutlierDetector(MIN_OUTLIER_DETECTION_DISKS,
SLOW_DISK_LOW_THRESHOLD_MS);
shouldRun = true;
startDiskOutlierDetectionThread();
}
private void startDiskOutlierDetectionThread() {
slowDiskDetectionDaemon = new Daemon(new Runnable() {
@Override
public void run() {
while (shouldRun) {
Map<String, Double> metadataOpStats = Maps.newHashMap();
Map<String, Double> readIoStats = Maps.newHashMap();
Map<String, Double> writeIoStats = Maps.newHashMap();
FsDatasetSpi.FsVolumeReferences fsVolumeReferences = null;
try {
fsVolumeReferences = dn.getFSDataset().getFsVolumeReferences();
Iterator<FsVolumeSpi> volumeIterator = fsVolumeReferences
.iterator();
while (volumeIterator.hasNext()) {
FsVolumeSpi volume = volumeIterator.next();
DataNodeVolumeMetrics metrics = volumeIterator.next().getMetrics();
String volumeName = volume.getBasePath();
metadataOpStats.put(volumeName,
metrics.getMetadataOperationMean());
readIoStats.put(volumeName, metrics.getReadIoMean());
writeIoStats.put(volumeName, metrics.getWriteIoMean());
}
} finally {
if (fsVolumeReferences != null) {
try {
fsVolumeReferences.close();
} catch (IOException e) {
LOG.error("Error in releasing FS Volume references", e);
}
}
}
if (metadataOpStats.isEmpty() && readIoStats.isEmpty() &&
writeIoStats.isEmpty()) {
LOG.debug("No disk stats available for detecting outliers.");
return;
}
detectAndUpdateDiskOutliers(metadataOpStats, readIoStats,
writeIoStats);
try {
Thread.sleep(detectionInterval);
} catch (InterruptedException e) {
LOG.error("Disk Outlier Detection thread interrupted", e);
Thread.currentThread().interrupt();
}
}
}
});
slowDiskDetectionDaemon.start();
}
private void detectAndUpdateDiskOutliers(Map<String, Double> metadataOpStats,
Map<String, Double> readIoStats, Map<String, Double> writeIoStats) {
Set<String> diskOutliersSet = Sets.newHashSet();
// Get MetadataOp Outliers
Map<String, Double> metadataOpOutliers = slowDiskDetector
.getOutliers(metadataOpStats);
if (!metadataOpOutliers.isEmpty()) {
diskOutliersSet.addAll(metadataOpOutliers.keySet());
}
// Get ReadIo Outliers
Map<String, Double> readIoOutliers = slowDiskDetector
.getOutliers(readIoStats);
if (!readIoOutliers.isEmpty()) {
diskOutliersSet.addAll(readIoOutliers.keySet());
}
// Get WriteIo Outliers
Map<String, Double> writeIoOutliers = slowDiskDetector
.getOutliers(writeIoStats);
if (!readIoOutliers.isEmpty()) {
diskOutliersSet.addAll(writeIoOutliers.keySet());
}
Map<String, Map<DiskOutlierDetectionOp, Double>> diskStats =
Maps.newHashMap();
for (String disk : diskOutliersSet) {
Map<DiskOutlierDetectionOp, Double> diskStat = Maps.newHashMap();
diskStat.put(DiskOutlierDetectionOp.METADATA, metadataOpStats.get(disk));
diskStat.put(DiskOutlierDetectionOp.READ, readIoStats.get(disk));
diskStat.put(DiskOutlierDetectionOp.WRITE, writeIoStats.get(disk));
diskStats.put(disk, diskStat);
}
diskOutliersStats = diskStats;
LOG.debug("Updated disk outliers.");
}
/**
* Lists the types of operations on which disk latencies are measured.
*/
public enum DiskOutlierDetectionOp {
METADATA,
READ,
WRITE
}
public Map<String,
Map<DiskOutlierDetectionOp, Double>> getDiskOutliersStats() {
return diskOutliersStats;
}
public void shutdownAndWait() {
shouldRun = false;
slowDiskDetectionDaemon.interrupt();
try {
slowDiskDetectionDaemon.join();
} catch (InterruptedException e) {
LOG.error("Disk Outlier Detection daemon did not shutdown", e);
}
}
/**
* Use only for testing.
*/
@VisibleForTesting
public void addSlowDiskForTesting(String slowDiskPath) {
diskOutliersStats.put(slowDiskPath,
ImmutableMap.<DiskOutlierDetectionOp, Double>of());
}
}