/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.raid.Statistics.Counters;
import org.apache.hadoop.raid.protocol.PolicyInfo;
import org.apache.hadoop.raid.protocol.PolicyList;
import org.apache.hadoop.util.StringUtils;
public class StatisticsCollector implements Runnable {
final static public Log LOG = LogFactory.getLog(StatisticsCollector.class);
final static public long UPDATE_PERIOD = 20 * 60 * 1000L;
final static public long FILES_SCANNED_LOGGING_PERIOD = 100 * 1000L;
final private ConfigManager configManager;
final private Path rsParityLocation;
final private Path xorParityLocation;
final private FileSystem fs;
final private Configuration conf;
final private int numThreads;
private volatile Map<ErasureCodeType, Statistics> lastRaidStatistics;
private volatile long lastUpdateFinishTime = 0L;
private volatile long lastUpdateUsedTime = 0L;
private long lastUpdateStartTime = 0L;
private volatile boolean running = true;
private volatile long filesScanned = 0;
public StatisticsCollector(ConfigManager configManager, Configuration conf)
throws IOException {
this.configManager = configManager;
this.conf = conf;
this.fs = new Path(Path.SEPARATOR).getFileSystem(conf);
this.rsParityLocation = RaidNode.rsDestinationPath(conf);
this.xorParityLocation = RaidNode.xorDestinationPath(conf);
this.lastUpdateFinishTime = 0L;
this.lastUpdateStartTime = 0L;
this.lastRaidStatistics = null;
this.numThreads = conf.getInt(RaidNode.RAID_DIRECTORYTRAVERSAL_THREADS, 4);
}
@Override
public void run() {
while (running) {
try {
while (RaidNode.now() - lastUpdateStartTime < UPDATE_PERIOD) {
Thread.sleep(UPDATE_PERIOD / 10);
}
Collection<PolicyInfo> allPolicies = loadPolicies();
collect(allPolicies);
} catch (IOException e) {
LOG.warn("Failed to collect statistics. Retry");
} catch (InterruptedException e) {
LOG.info(StatisticsCollector.class + " interrupted.");
}
}
}
public void stop() {
running = false;
}
public Statistics getRaidStatistics(ErasureCodeType code) {
if (lastRaidStatistics == null) {
return null;
}
return lastRaidStatistics.get(code);
}
public long getUpateUsedTime() {
return lastUpdateUsedTime;
}
public long getLastUpdateTime() {
return lastUpdateFinishTime;
}
/**
* Get the total RAID saving in bytes
* @return Number of bytes saved due to RAID
*/
public long getSaving() {
if (lastRaidStatistics == null) {
return -1;
}
long saving = 0;
for (ErasureCodeType code : ErasureCodeType.values()) {
long s = lastRaidStatistics.get(code).getSaving();
if (s == -1) {
return -1;
}
saving += s;
}
return saving;
}
/**
* Get the estimated toal RAID saving when policies are done
* @return Number of bytes saved due to RAID
*/
public long getDoneSaving() {
if (lastRaidStatistics == null) {
return -1;
}
long saving = 0;
for (ErasureCodeType code : ErasureCodeType.values()) {
long s = lastRaidStatistics.get(code).getDoneSaving();
if (s == -1) {
return -1;
}
saving += s;
}
return saving;
}
public double getEffectiveReplication() {
if (lastRaidStatistics == null) {
return -1;
}
DFSClient dfs;
double totalPhysical;
try {
dfs = new DFSClient(conf);
totalPhysical = dfs.getDiskStatus().getDfsUsed();
} catch (IOException e) {
return -1;
}
double notRaidedPhysical = totalPhysical;
double totalLogical = 0;
for (ErasureCodeType code : ErasureCodeType.values()) {
Statistics st = lastRaidStatistics.get(code);
totalLogical += st.getSourceCounters(RaidState.RAIDED).getNumLogical();
notRaidedPhysical -= st.getSourceCounters(RaidState.RAIDED).getNumBytes();
notRaidedPhysical -= st.getParityCounters().getNumBytes();
}
totalLogical += notRaidedPhysical / dfs.getDefaultReplication();
if (totalLogical == 0) {
// divided by 0
return -1;
}
return totalPhysical / totalLogical;
}
private Collection<PolicyInfo> loadPolicies() {
configManager.reloadConfigsIfNecessary();
Collection<PolicyInfo> allPolicyInfos = new ArrayList<PolicyInfo>();
for (PolicyList policyList : configManager.getAllPolicies()) {
allPolicyInfos.addAll(policyList.getAll());
}
return allPolicyInfos;
}
void collect(Collection<PolicyInfo> allPolicies) throws IOException {
Map<ErasureCodeType, Statistics>
codeToRaidStatistics = createEmptyStatistics();
lastUpdateStartTime = RaidNode.now();
filesScanned = 0;
Statistics rsStats = codeToRaidStatistics.get(ErasureCodeType.RS);
Statistics xorStats = codeToRaidStatistics.get(ErasureCodeType.XOR);
collectSourceStatistics(codeToRaidStatistics, allPolicies);
collectParityStatistics(rsParityLocation, rsStats);
collectParityStatistics(xorParityLocation, xorStats);
lastRaidStatistics = codeToRaidStatistics;
populateMetrics(codeToRaidStatistics);
long now = RaidNode.now();
lastUpdateFinishTime = now;
lastUpdateUsedTime = lastUpdateFinishTime - lastUpdateStartTime;
LOG.info("Finishing collecting statistics.");
}
private Map<ErasureCodeType, Statistics> createEmptyStatistics() {
Map<ErasureCodeType, Statistics> m =
new HashMap<ErasureCodeType, Statistics>();
for (ErasureCodeType code : ErasureCodeType.values()) {
m.put(code, new Statistics(code, conf));
}
return new EnumMap<ErasureCodeType, Statistics>(m);
}
private void collectSourceStatistics(
Map<ErasureCodeType, Statistics> codeToRaidStatistics,
Collection<PolicyInfo> allPolicyInfos)
throws IOException {
long now = RaidNode.now();
RaidState.Checker checker =
new RaidState.Checker(allPolicyInfos, conf);
for (PolicyInfo info : allPolicyInfos) {
LOG.info("Collecting statistics for policy:" + info.getName() + ".");
ErasureCodeType code = info.getErasureCode();
Statistics statistics = codeToRaidStatistics.get(code);
DirectoryTraversal retriver =
DirectoryTraversal.fileRetriever(
info.getSrcPathExpanded(), fs, numThreads, false);
int targetReplication =
Integer.parseInt(info.getProperty("targetReplication"));
FileStatus file;
while ((file = retriver.next()) != DirectoryTraversal.FINISH_TOKEN) {
statistics.addSourceFile(info, file, checker, now, targetReplication);
incFileScanned();
}
LOG.info("Finish collecting statistics for policy:" + info.getName() +
"\n" + statistics);
}
}
private void collectParityStatistics(Path parityLocation,
Statistics statistics) throws IOException {
LOG.info("Collecting parity statistics in " + parityLocation + ".");
DirectoryTraversal retriver =
DirectoryTraversal.fileRetriever(
Arrays.asList(parityLocation), fs, numThreads, false);
FileStatus file;
while ((file = retriver.next()) != DirectoryTraversal.FINISH_TOKEN) {
statistics.addParityFile(file);
incFileScanned();
}
LOG.info("Finish collecting statistics in " +
parityLocation + "\n" + statistics);
}
public long getFilesScanned() {
return filesScanned;
}
private void incFileScanned() {
filesScanned += 1;
if (filesScanned % FILES_SCANNED_LOGGING_PERIOD == 0) {
LOG.info("Scanned " +
StringUtils.humanReadableInt(filesScanned) + " files.");
}
}
private void populateMetrics(
Map<ErasureCodeType, Statistics> codeToRaidStatistics) {
RaidNodeMetrics metrics = RaidNodeMetrics.getInstance();
for (ErasureCodeType code : ErasureCodeType.values()) {
Counters counters = codeToRaidStatistics.get(code).getParityCounters();
metrics.parityFiles.get(code).set(counters.getNumFiles());
metrics.parityBlocks.get(code).set(counters.getNumBlocks());
metrics.parityBytes.get(code).set(counters.getNumBytes());
metrics.parityLogical.get(code).set(counters.getNumLogical());
for (RaidState state : RaidState.values()) {
counters = codeToRaidStatistics.get(code).getSourceCounters(state);
metrics.sourceFiles.get(code).get(state).set(counters.getNumFiles());
metrics.sourceBlocks.get(code).get(state).set(counters.getNumBlocks());
metrics.sourceBytes.get(code).get(state).set(counters.getNumBytes());
metrics.sourceLogical.get(code).get(state).set(counters.getNumLogical());
}
}
double repl = getEffectiveReplication();
long saving = getSaving();
if (repl != -1) {
metrics.effectiveReplicationTimes1000.set( (long)(1000 * repl));
}
if (saving != -1) {
metrics.saving.set(saving);
}
}
}