/*
* Copyright 2013-2014 eXascale Infolab, University of Fribourg. All rights reserved.
*/
package org.apache.hadoop.hadaps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.net.NetworkTopology;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.*;
class Balancer {
private static final Logger LOG = LoggerFactory.getLogger(Balancer.class);
private static final int CONCURRENT_TASKS = 3;
private final List<ParameterFile> parameterFiles;
private final Configuration configuration;
private final ThreadPoolExecutor threadPool = new ThreadPoolExecutor(
CONCURRENT_TASKS, CONCURRENT_TASKS, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>());
private final CompletionService<Integer> completionService =
new ExecutorCompletionService<Integer>(threadPool);
Balancer(List<ParameterFile> parameterFiles, Configuration configuration) {
if (parameterFiles == null) throw new IllegalArgumentException();
if (configuration == null) throw new IllegalArgumentException();
this.parameterFiles = parameterFiles;
this.configuration = configuration;
}
void run() throws IOException, InterruptedException {
// Get the distributed filesystem
FileSystem fs = FileSystem.get(configuration);
if (!(fs instanceof DistributedFileSystem)) {
throw new IllegalStateException("Filesystem " + fs.getUri() + " is not an HDFS filesystem");
}
DistributedFileSystem fileSystem = (DistributedFileSystem) fs;
// Create BalancerNameNode
BalancerNameNode nameNode = new BalancerNameNode(fileSystem);
// Create our policy
HadapsBlockPlacementPolicy policy = new HadapsBlockPlacementPolicy(configuration, null,
NetworkTopology.getInstance(configuration));
// Populate balancer files
List<BalancerFile> files = getBalancerFiles(fileSystem);
// Now balance each file
for (BalancerFile file : files) {
while (threadPool.getActiveCount() >= CONCURRENT_TASKS) {
// Await completion of any submitted task
completionService.poll(1, TimeUnit.SECONDS);
}
completionService.submit(new BalancerTask(file, policy, nameNode));
}
// Await completion of any submitted task
while (threadPool.getActiveCount() > 0) {
completionService.poll(1, TimeUnit.SECONDS);
}
// Initiate a proper shutdown
threadPool.shutdown();
threadPool.awaitTermination(10, TimeUnit.SECONDS);
}
private List<BalancerFile> getBalancerFiles(DistributedFileSystem fileSystem) throws IOException {
List<BalancerFile> balancerFiles = new ArrayList<BalancerFile>();
// Iterate over each pattern
for (ParameterFile parameterFile : parameterFiles) {
Path globPath = new Path(parameterFile.getName());
FileStatus[] stats = fileSystem.globStatus(globPath);
if (stats != null && stats.length > 0) {
// We have some matching paths
List<BalancerFile> matchingFiles = new ArrayList<BalancerFile>();
for (FileStatus stat : stats) {
populateBalancerFiles(matchingFiles, stat, parameterFile, fileSystem);
}
balancerFiles.addAll(matchingFiles);
LOG.info("Matching files for pattern \"{}\": {}", globPath.toString(), matchingFiles);
} else {
LOG.info("No matching files for pattern \"{}\"", globPath.toString());
}
}
Collections.sort(balancerFiles);
return balancerFiles;
}
private void populateBalancerFiles(List<BalancerFile> balancerFiles, FileStatus status,
ParameterFile parameterFile, DistributedFileSystem fileSystem) throws IOException {
assert balancerFiles != null;
assert status != null;
assert parameterFile != null;
assert fileSystem != null;
if (status.isFile()) {
balancerFiles.add(new BalancerFile(status, parameterFile, fileSystem));
} else if (status.isDirectory()) {
// Recurse into directory
FileStatus[] stats = fileSystem.listStatus(status.getPath());
for (FileStatus stat : stats) {
populateBalancerFiles(balancerFiles, stat, parameterFile, fileSystem);
}
}
}
}