/*
* The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
* (the "License"). You may not use this work except in compliance with the License, which is
* available at www.apache.org/licenses/LICENSE-2.0
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied, as more fully set forth in the License.
*
* See the NOTICE file distributed with this work for information regarding copyright ownership.
*/
package alluxio.underfs.hdfs;
import alluxio.AlluxioURI;
import alluxio.underfs.UnderFileSystemCluster;
import alluxio.util.io.FileUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
/**
* A local MiniDFSCluster for testing {@code HdfsUnderFileSystem}. This class emulates an HDFS
* cluster on the local machine, so {@code HdfsUnderFilesystem} can talk to this emulated HDFS
* cluster.
*/
public class LocalMiniDFSCluster extends UnderFileSystemCluster {
private static final Logger LOG = LoggerFactory.getLogger(LocalMiniDFSCluster.class);
private org.apache.hadoop.conf.Configuration mConf = new org.apache.hadoop.conf.Configuration();
private int mNamenodePort;
private int mNumDataNode;
private MiniDFSCluster mDfsCluster = null;
private DistributedFileSystem mDfsClient = null;
private boolean mIsStarted = false;
/**
* Initializes a {@link LocalMiniDFSCluster with a single namenode and datanode.
*
* @param dfsBaseDirs the base directory for both namenode and datanode. The dfs.name.dir and
* dfs.data.dir will be setup as dfsBaseDir/name* and dfsBaseDir/data* respectively
*/
public LocalMiniDFSCluster(String dfsBaseDirs) {
this(dfsBaseDirs, 1, 0);
}
/**
* Creates a new {@link LocalMiniDFSCluster}.
*
* @param dfsBaseDirs the base directory for both namenode and datanode. The dfs.name.dir and
* dfs.data.dir will be setup as dfsBaseDir/name* and dfsBaseDir/data* respectively
* @param numDataNode the number of datanode
*/
public LocalMiniDFSCluster(String dfsBaseDirs, int numDataNode) {
this(dfsBaseDirs, numDataNode, 0);
}
/**
* Creates a new {@link LocalMiniDFSCluster}.
*
* @param dfsBaseDirs the base directory for both namenode and datanode. The dfs.name.dir and
* dfs.data.dir will be setup as dfsBaseDir/name* and dfsBaseDir/data* respectively
* @param numDataNode The number of datanode
* @param nameNodePort the port of namenode. If it is 0, the real namenode port can be retrieved
* by {@link #getNameNodePort()} after the cluster started
*/
public LocalMiniDFSCluster(String dfsBaseDirs, int numDataNode, int nameNodePort) {
super(dfsBaseDirs);
mNamenodePort = nameNodePort;
mNumDataNode = numDataNode;
}
/**
* Creates a new {@link LocalMiniDFSCluster}.
*
* @param conf the base configuration to use in starting the servers. This will be modified as
* necessary.
* @param dfsBaseDirs the base directory for both namenode and datanode. The dfs.name.dir and
* dfs.data.dir will be setup as dfsBaseDir/name* and dfsBaseDir/data* respectively
* @param numDataNode the number of datanode
* @param nameNodePort the port of namenode. If it is 0, the real namenode port can be retrieved
* by {@link #getNameNodePort()} after the cluster started
*/
public LocalMiniDFSCluster(org.apache.hadoop.conf.Configuration conf, String dfsBaseDirs,
int numDataNode, int nameNodePort) {
super(dfsBaseDirs);
mConf = conf;
mNamenodePort = nameNodePort;
mNumDataNode = numDataNode;
}
/**
* @return {@link #mDfsClient}
*/
public DistributedFileSystem getDFSClient() {
return mDfsClient;
}
/**
* Gets the specified or real namenode port.
*
* @return port of namenode
*/
public int getNameNodePort() {
return mNamenodePort;
}
/**
* Gets the namenode address for this {@link LocalMiniDFSCluster}.
*
* @return namenode address
*/
@Override
public String getUnderFilesystemAddress() {
if (mDfsClient != null) {
return mDfsClient.getUri().toString();
}
return null;
}
@Override
public boolean isStarted() {
return mIsStarted;
}
@Override
public void shutdown() throws IOException {
LOG.info("Shutting down DFS cluster.");
if (mIsStarted) {
mDfsClient.close();
mDfsCluster.shutdown();
mIsStarted = false;
}
}
/**
* Starts the minidfscluster before using it.
*/
@Override
public void start() throws IOException {
LOG.info("Starting DFS cluster.");
if (!mIsStarted) {
FileUtils.deletePathRecursively(mBaseDir);
FileUtils.createDir(mBaseDir);
// TODO(hy): For hadoop 1.x, there exists NPE while startDataNode. It is a known issue caused
// by "umask 002" (should be 022) see [HDFS-2556]. So the following code only works for
// hadoop 2.x or "umask 022".
System.setProperty("test.build.data", mBaseDir);
mDfsCluster = new MiniDFSCluster(mNamenodePort, mConf, mNumDataNode, true, true, null, null);
mDfsCluster.waitClusterUp();
if (0 == mNamenodePort) {
mNamenodePort = mDfsCluster.getNameNodePort();
}
// For HDFS of earlier versions, getFileSystem() returns an instance of type
// {@link org.apache.hadoop.fs.FileSystem} rather than {@link DistributedFileSystem}
mDfsClient = (DistributedFileSystem) mDfsCluster.getFileSystem();
mIsStarted = true;
}
}
@Override
public void cleanup() throws IOException {
if (!isStarted()) {
return;
}
DistributedFileSystem client = getDFSClient();
FileStatus[] files =
client.listStatus(new Path(getUnderFilesystemAddress() + AlluxioURI.SEPARATOR));
if (files == null) {
return;
}
for (FileStatus status : files) {
client.delete(status.getPath(), true);
}
}
}