/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.balancer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.NameNodeProxies;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.server.balancer.Balancer.Cli;
import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset;
import org.apache.hadoop.util.Time;
import org.apache.hadoop.util.Tool;
import org.junit.Test;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Random;
import java.util.concurrent.TimeoutException;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
/**
* This class tests if a balancer schedules tasks correctly.
*/
public class TestBalancer {
private static final Log LOG =
LogFactory.getLog("org.apache.hadoop.hdfs.TestBalancer");
final static long CAPACITY = 500L;
final static String RACK0 = "/rack0";
final static String RACK1 = "/rack1";
final static String RACK2 = "/rack2";
final private static String fileName = "/tmp.txt";
final static Path filePath = new Path(fileName);
private MiniDFSCluster cluster;
ClientProtocol client;
static final long TIMEOUT = 20000L; //msec
static final double CAPACITY_ALLOWED_VARIANCE = 0.005; // 0.5%
static final double BALANCE_ALLOWED_VARIANCE = 0.11; // 10%+delta
static final int DEFAULT_BLOCK_SIZE = 10;
private static final Random r = new Random();
static {
Balancer.setBlockMoveWaitTime(1000L);
}
static void initConf(Configuration conf) {
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, DEFAULT_BLOCK_SIZE);
conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, DEFAULT_BLOCK_SIZE);
conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1L);
SimulatedFSDataset.setFactory(conf);
conf.setLong(DFSConfigKeys.DFS_BALANCER_MOVEDWINWIDTH_KEY, 2000L);
}
/* create a file with a length of <code>fileLen</code> */
static void createFile(MiniDFSCluster cluster, Path filePath, long fileLen,
short replicationFactor, int nnIndex)
throws IOException, InterruptedException, TimeoutException {
FileSystem fs = cluster.getFileSystem(nnIndex);
DFSTestUtil
.createFile(fs, filePath, fileLen, replicationFactor, r.nextLong());
DFSTestUtil.waitReplication(fs, filePath, replicationFactor);
}
/* fill up a cluster with <code>numNodes</code> datanodes
* whose used space to be <code>size</code>
*/
private ExtendedBlock[] generateBlocks(Configuration conf, long size,
short numNodes)
throws IOException, InterruptedException, TimeoutException {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numNodes).build();
try {
cluster.waitActive();
client = NameNodeProxies
.createProxy(conf, cluster.getFileSystem(0).getUri(),
ClientProtocol.class).getProxy();
short replicationFactor = (short) (numNodes - 1);
long fileLen = size / replicationFactor;
createFile(cluster, filePath, fileLen, replicationFactor, 0);
List<LocatedBlock> locatedBlocks = client.
getBlockLocations(fileName, 0, fileLen).getLocatedBlocks();
int numOfBlocks = locatedBlocks.size();
ExtendedBlock[] blocks = new ExtendedBlock[numOfBlocks];
for (int i = 0; i < numOfBlocks; i++) {
ExtendedBlock b = locatedBlocks.get(i).getBlock();
blocks[i] = new ExtendedBlock(b.getBlockPoolId(), b.getBlockId(),
b.getNumBytes(), b.getGenerationStamp());
}
return blocks;
} finally {
cluster.shutdown();
}
}
/* Distribute all blocks according to the given distribution */
static Block[][] distributeBlocks(ExtendedBlock[] blocks,
short replicationFactor, final long[] distribution) {
// make a copy
long[] usedSpace = new long[distribution.length];
System.arraycopy(distribution, 0, usedSpace, 0, distribution.length);
List<List<Block>> blockReports =
new ArrayList<List<Block>>(usedSpace.length);
Block[][] results = new Block[usedSpace.length][];
for (int i = 0; i < usedSpace.length; i++) {
blockReports.add(new ArrayList<Block>());
}
for (int i = 0; i < blocks.length; i++) {
for (int j = 0; j < replicationFactor; j++) {
boolean notChosen = true;
while (notChosen) {
int chosenIndex = r.nextInt(usedSpace.length);
if (usedSpace[chosenIndex] > 0) {
notChosen = false;
blockReports.get(chosenIndex).add(blocks[i].getLocalBlock());
usedSpace[chosenIndex] -= blocks[i].getNumBytes();
}
}
}
}
for (int i = 0; i < usedSpace.length; i++) {
List<Block> nodeBlockList = blockReports.get(i);
results[i] = nodeBlockList.toArray(new Block[nodeBlockList.size()]);
}
return results;
}
static long sum(long[] x) {
long s = 0L;
for (long a : x) {
s += a;
}
return s;
}
/* we first start a cluster and fill the cluster up to a certain size.
* then redistribute blocks according the required distribution.
* Afterwards a balancer is running to balance the cluster.
*/
private void testUnevenDistribution(Configuration conf, long distribution[],
long capacities[], String[] racks) throws Exception {
int numDatanodes = distribution.length;
if (capacities.length != numDatanodes || racks.length != numDatanodes) {
throw new IllegalArgumentException("Array length is not the same");
}
// calculate total space that need to be filled
final long totalUsedSpace = sum(distribution);
// fill the cluster
ExtendedBlock[] blocks =
generateBlocks(conf, totalUsedSpace, (short) numDatanodes);
// redistribute blocks
Block[][] blocksDN =
distributeBlocks(blocks, (short) (numDatanodes - 1), distribution);
// restart the cluster: do NOT format the cluster
conf.set(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY, "0.0f");
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDatanodes)
.format(false).racks(racks).simulatedCapacities(capacities).build();
cluster.waitActive();
client = NameNodeProxies
.createProxy(conf, cluster.getFileSystem(0).getUri(),
ClientProtocol.class).getProxy();
for (int i = 0; i < blocksDN.length; i++) {
cluster.injectBlocks(i, Arrays.asList(blocksDN[i]));
}
final long totalCapacity = sum(capacities);
runBalancer(conf, totalUsedSpace, totalCapacity);
cluster.shutdown();
}
/**
* Wait until heartbeat gives expected results, within
* CAPACITY_ALLOWED_VARIANCE,
* summed over all nodes. Times out after TIMEOUT msec.
*
* @param expectedUsedSpace
* @param expectedTotalSpace
* @throws IOException
* - if getStats() fails
* @throws TimeoutException
*/
static void waitForHeartBeat(long expectedUsedSpace, long expectedTotalSpace,
ClientProtocol client, MiniDFSCluster cluster)
throws IOException, TimeoutException {
long timeout = TIMEOUT;
long failtime = (timeout <= 0L) ? Long.MAX_VALUE : Time.now() + timeout;
while (true) {
long[] status = client.getStats();
double totalSpaceVariance =
Math.abs((double) status[0] - expectedTotalSpace) /
expectedTotalSpace;
double usedSpaceVariance =
Math.abs((double) status[1] - expectedUsedSpace) / expectedUsedSpace;
if (totalSpaceVariance < CAPACITY_ALLOWED_VARIANCE &&
usedSpaceVariance < CAPACITY_ALLOWED_VARIANCE) {
break; //done
}
if (Time.now() > failtime) {
throw new TimeoutException(
"Cluster failed to reached expected values of " +
"totalSpace (current: " + status[0] + ", expected: " +
expectedTotalSpace + "), or usedSpace (current: " + status[1] +
", expected: " + expectedUsedSpace + "), in more than " +
timeout + " msec.");
}
try {
Thread.sleep(100L);
} catch (InterruptedException ignored) {
}
}
}
/**
* Wait until balanced: each datanode gives utilization within
* BALANCE_ALLOWED_VARIANCE of average
*
* @throws IOException
* @throws TimeoutException
*/
static void waitForBalancer(long totalUsedSpace, long totalCapacity,
ClientProtocol client, MiniDFSCluster cluster)
throws IOException, TimeoutException {
long timeout = TIMEOUT;
long failtime = (timeout <= 0L) ? Long.MAX_VALUE : Time.now() + timeout;
final double avgUtilization = ((double) totalUsedSpace) / totalCapacity;
boolean balanced;
do {
DatanodeInfo[] datanodeReport =
client.getDatanodeReport(DatanodeReportType.ALL);
assertEquals(datanodeReport.length, cluster.getDataNodes().size());
balanced = true;
for (DatanodeInfo datanode : datanodeReport) {
double nodeUtilization =
((double) datanode.getDfsUsed()) / datanode.getCapacity();
if (Math.abs(avgUtilization - nodeUtilization) >
BALANCE_ALLOWED_VARIANCE) {
balanced = false;
if (Time.now() > failtime) {
throw new TimeoutException(
"Rebalancing expected avg utilization to become " +
avgUtilization + ", but on datanode " + datanode +
" it remains at " + nodeUtilization + " after more than " +
TIMEOUT + " msec.");
}
try {
Thread.sleep(100);
} catch (InterruptedException ignored) {
}
break;
}
}
} while (!balanced);
}
/**
* This test start a cluster with specified number of nodes,
* and fills it to be 30% full (with a single file replicated identically
* to all datanodes);
* It then adds one new empty node and starts balancing.
*
* @param conf
* - configuration
* @param capacities
* - array of capacities of original nodes in cluster
* @param racks
* - array of racks for original nodes in cluster
* @param newCapacity
* - new node's capacity
* @param newRack
* - new node's rack
* @param useTool
* - if true run test via Cli with command-line argument
* parsing, etc. Otherwise invoke balancer API directly.
* @throws Exception
*/
private void doTest(Configuration conf, long[] capacities, String[] racks,
long newCapacity, String newRack, boolean useTool) throws Exception {
assertEquals(capacities.length, racks.length);
int numOfDatanodes = capacities.length;
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(capacities.length)
.racks(racks).simulatedCapacities(capacities).build();
try {
cluster.waitActive();
client = NameNodeProxies
.createProxy(conf, cluster.getFileSystem(0).getUri(),
ClientProtocol.class).getProxy();
long totalCapacity = sum(capacities);
// fill up the cluster to be 30% full
long totalUsedSpace = totalCapacity * 3 / 10;
createFile(cluster, filePath, totalUsedSpace / numOfDatanodes,
(short) numOfDatanodes, 0);
// start up an empty node with the same capacity and on the same rack
cluster.startDataNodes(conf, 1, true, null, new String[]{newRack},
new long[]{newCapacity});
totalCapacity += newCapacity;
// run balancer and validate results
if (useTool) {
runBalancerCli(conf, totalUsedSpace, totalCapacity);
} else {
runBalancer(conf, totalUsedSpace, totalCapacity);
}
} finally {
cluster.shutdown();
}
}
private void runBalancer(Configuration conf, long totalUsedSpace,
long totalCapacity) throws Exception {
waitForHeartBeat(totalUsedSpace, totalCapacity, client, cluster);
// start rebalancing
Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf);
final int r = Balancer.run(namenodes, Balancer.Parameters.DEFALUT, conf);
assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
waitForHeartBeat(totalUsedSpace, totalCapacity, client, cluster);
LOG.info("Rebalancing with default ctor.");
waitForBalancer(totalUsedSpace, totalCapacity, client, cluster);
}
private void runBalancerCli(Configuration conf, long totalUsedSpace,
long totalCapacity) throws Exception {
waitForHeartBeat(totalUsedSpace, totalCapacity, client, cluster);
final String[] args = {"-policy", "datanode"};
final Tool tool = new Cli();
tool.setConf(conf);
final int r = tool.run(args); // start rebalancing
assertEquals("Tools should exit 0 on success", 0, r);
waitForHeartBeat(totalUsedSpace, totalCapacity, client, cluster);
LOG.info("Rebalancing with default ctor.");
waitForBalancer(totalUsedSpace, totalCapacity, client, cluster);
}
/**
* one-node cluster test
*/
private void oneNodeTest(Configuration conf, boolean useTool)
throws Exception {
// add an empty node with half of the CAPACITY & the same rack
doTest(conf, new long[]{CAPACITY}, new String[]{RACK0}, CAPACITY / 2, RACK0,
useTool);
}
/**
* two-node cluster test
*/
private void twoNodeTest(Configuration conf) throws Exception {
doTest(conf, new long[]{CAPACITY, CAPACITY}, new String[]{RACK0, RACK1},
CAPACITY, RACK2, false);
}
/**
* test using a user-supplied conf
*/
public void integrationTest(Configuration conf) throws Exception {
initConf(conf);
oneNodeTest(conf, false);
}
/**
* Test parse method in Balancer#Cli class with threshold value out of
* boundaries.
*/
@Test(timeout = 100000)
public void testBalancerCliParseWithThresholdOutOfBoundaries() {
String parameters[] = new String[]{"-threshold", "0"};
String reason =
"IllegalArgumentException is expected when threshold value" +
" is out of boundary.";
try {
Balancer.Cli.parse(parameters);
fail(reason);
} catch (IllegalArgumentException e) {
assertEquals("Number out of range: threshold = 0.0", e.getMessage());
}
parameters = new String[]{"-threshold", "101"};
try {
Balancer.Cli.parse(parameters);
fail(reason);
} catch (IllegalArgumentException e) {
assertEquals("Number out of range: threshold = 101.0", e.getMessage());
}
}
/**
* Test a cluster with even distribution,
* then a new empty node is added to the cluster
*/
@Test(timeout = 100000)
public void testBalancer0() throws Exception {
testBalancer0Internal(new HdfsConfiguration());
}
void testBalancer0Internal(Configuration conf) throws Exception {
initConf(conf);
oneNodeTest(conf, false);
twoNodeTest(conf);
}
/**
* Test unevenly distributed cluster
*/
@Test(timeout = 100000)
public void testBalancer1() throws Exception {
testBalancer1Internal(new HdfsConfiguration());
}
void testBalancer1Internal(Configuration conf) throws Exception {
initConf(conf);
testUnevenDistribution(conf,
new long[]{50 * CAPACITY / 100, 10 * CAPACITY / 100},
new long[]{CAPACITY, CAPACITY}, new String[]{RACK0, RACK1});
}
@Test(timeout = 100000)
public void testBalancer2() throws Exception {
testBalancer2Internal(new HdfsConfiguration());
}
void testBalancer2Internal(Configuration conf) throws Exception {
initConf(conf);
testBalancerDefaultConstructor(conf, new long[]{CAPACITY, CAPACITY},
new String[]{RACK0, RACK1}, CAPACITY, RACK2);
}
private void testBalancerDefaultConstructor(Configuration conf,
long[] capacities, String[] racks, long newCapacity, String newRack)
throws Exception {
int numOfDatanodes = capacities.length;
assertEquals(numOfDatanodes, racks.length);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(capacities.length)
.racks(racks).simulatedCapacities(capacities).build();
try {
cluster.waitActive();
client = NameNodeProxies
.createProxy(conf, cluster.getFileSystem(0).getUri(),
ClientProtocol.class).getProxy();
long totalCapacity = sum(capacities);
// fill up the cluster to be 30% full
long totalUsedSpace = totalCapacity * 3 / 10;
createFile(cluster, filePath, totalUsedSpace / numOfDatanodes,
(short) numOfDatanodes, 0);
// start up an empty node with the same capacity and on the same rack
cluster.startDataNodes(conf, 1, true, null, new String[]{newRack},
new long[]{newCapacity});
totalCapacity += newCapacity;
// run balancer and validate results
runBalancer(conf, totalUsedSpace, totalCapacity);
} finally {
cluster.shutdown();
}
}
/**
* Verify balancer exits 0 on success.
*/
@Test(timeout = 100000)
public void testExitZeroOnSuccess() throws Exception {
final Configuration conf = new HdfsConfiguration();
initConf(conf);
oneNodeTest(conf, true);
}
/**
* @param args
*/
public static void main(String[] args) throws Exception {
TestBalancer balancerTest = new TestBalancer();
balancerTest.testBalancer0();
balancerTest.testBalancer1();
balancerTest.testBalancer2();
}
}