/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.PrintStream; import java.net.URI; import java.security.SecureRandom; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.test.system.HDFSCluster; import org.apache.hadoop.hdfs.test.system.NNClient; import org.apache.hadoop.hdfs.test.system.DNClient; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.Progressable; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.mortbay.util.ajax.JSON; public class TestBalancer { private static final Log LOG = LogFactory.getLog(TestBalancer.class); private static final String BALANCER_TEMP_DIR = "balancer-temp"; private Configuration hadoopConf; private HDFSCluster dfsCluster; public TestBalancer() throws Exception { } @Before public void setUp() throws Exception { hadoopConf = new Configuration(); dfsCluster = HDFSCluster.createCluster(hadoopConf); dfsCluster.setUp(); } @After public void tearDown() throws Exception { dfsCluster.tearDown(); } // Trivial @Test public void testNamenodePing() throws IOException { LOG.info("testing filesystem ping"); NNClient namenode = dfsCluster.getNNClient(); namenode.ping(); LOG.info("done."); } // Trivial @Test public void testNamenodeConnectDisconnect() throws IOException { LOG.info("connecting to namenode"); NNClient namenode = dfsCluster.getNNClient(); namenode.connect(); LOG.info("done."); LOG.info("disconnecting from namenode"); namenode.disconnect(); } /** * The basic scenario for balancer test is as follows * * - Bring up cluster with 1 DataNode * - Load DataNode to >50% * - Count files/blocks on DataNode * - Add new, empty DataNode to cluster * - Run Balancer * - Count files/blocks on DataNodes * - Blocks counts from before and after Balancer run should be consistent * */ @Test public void testBalancerBasicScenario() throws IOException { Path balancerTempDir = null; try { List<DNClient> testnodes = reserveDatanodesForTest(2); DNClient testnode1 = testnodes.get(0); DNClient testnode2 = testnodes.get(1); shutdownNonTestNodes(testnodes); LOG.info("attempting to kill both test nodes"); stopDatanode(testnode1); stopDatanode(testnode2); LOG.info("starting up datanode ["+ testnode1.getHostName()+ "] and loading it with data"); startDatanode(testnode1); // mkdir balancer-temp balancerTempDir = makeTempDir(); // write 2 blocks to file system LOG.info("generating filesystem load"); // TODO spec blocks to generate by blockCount, blockSize, # of writers generateFileSystemLoad(2); // generate 2 blocks of test data LOG.info("measure space used on 1st node"); long usedSpace0 = getDatanodeUsedSpace(testnode1); LOG.info("datanode " + testnode1.getHostName() + " contains " + usedSpace0 + " bytes"); LOG.info("bring up a 2nd node and run balancer on DFS"); startDatanode(testnode2); runBalancerAndVerify(testnodes); } catch (Throwable t) { LOG.info("method testBalancer failed", t); } finally { // finally block to run cleanup LOG.info("clean off test data from DFS [rmr ~/balancer-temp]"); try { deleteTempDir(balancerTempDir); } catch (Exception e) { LOG.warn("problem cleaning up temp dir", e); } // restart killed nodes Iterator<DNClient> iter = dfsCluster.getDNClients().iterator(); while (iter.hasNext()) { DNClient dn = iter.next(); startDatanode( dn ); } } } private void shutdownNonTestNodes(List<DNClient> testnodes) { Set killSet = new HashSet(getAllDatanodes()); killSet.removeAll(testnodes); LOG.info("attempting to kill/suspend all the nodes not used for this test"); Iterator<DNClient> iter = killSet.iterator(); DNClient dn = null; while (iter.hasNext()) { dn = iter.next(); // kill may not work with some secure-HDFS configs, // so using our stopDataNode() method stopDatanode(dn); } } /** * Kill all datanodes but leave reservationCount nodes alive, * return a list of the reserved datanodes */ private List<DNClient> reserveDatanodesForTest(int reservationCount) { List<DNClient> testDNs = new LinkedList<DNClient>(); List<DNClient> dieDNs = new LinkedList<DNClient>(); LOG.info("getting collection of live data nodes"); List<DNClient> dnList = getAllDatanodes(); int dnCount = dnList.size(); // check to make sure there is enough capacity on these nodes to run test Assert.assertTrue( String.format( "not enough datanodes available to run test," + " need %d datanodes but have only %d available", reservationCount, dnCount), ( dnCount >= reservationCount )); LOG.info("selecting "+reservationCount+" nodes for test"); dieDNs = new LinkedList<DNClient>(dnList); testDNs = new LinkedList<DNClient>(); final int LEN = dnCount - 1; int i = getRandom(LEN); DNClient testDN = dieDNs.get(i); testDNs.add(testDN); dieDNs.remove(testDN); int j = i; do { i = getRandom(LEN); } while (i != j); testDN = dieDNs.get(i); testDNs.add(testDN); dieDNs.remove(testDN); LOG.info("nodes reserved for test"); printDatanodeList(testDNs); LOG.info("nodes not used in test"); printDatanodeList(dieDNs); return testDNs; } private List<DNClient> getAllDatanodes() { return dfsCluster.getDNClients(); } private final static DNClient[] DATANODE_ARRAY = {}; private DNClient[] toDatanodeArray(List<DNClient> datanodeList) { return (DNClient[]) datanodeList.toArray(DATANODE_ARRAY); } /** * Return a random number between 0 and N inclusive. * * @param int n * @param n max number to return * @return random integer between 0 and N */ private int getRandom(int n) { return (int) (n * Math.random()); } /** * Calculate if the error in expected and observed values is within tolerance * * @param expectedValue expected value of experiment * @param observedValue observed value of experiment * @param tolerance per cent tolerance for error, represented as a int */ private boolean withinTolerance(long expectedValue, long observedValue, int tolerance) { double diff = 1.0 * Math.abs(observedValue - expectedValue); double thrs = expectedValue * (tolerance/100); return diff > thrs; } // emulate tolerance calculation in balancer code public final static int DEFAULT_TOLERANCE = 10; // 10% protected boolean isClusterBalanced(DNClient[] datanodes) throws IOException { return isClusterBalanced(datanodes, DEFAULT_TOLERANCE); } protected boolean isClusterBalanced(DNClient[] datanodes, int tolerance) throws IOException { Assert.assertFalse("empty datanode array specified", ArrayUtils.isEmpty(datanodes)); boolean result = true; double[] utilizationByNode = new double[ datanodes.length ]; double totalUsedSpace = 0L; double totalCapacity = 0L; Map datanodeVolumeMap = new HashMap(); // accumulate space stored on each node for(int i=0; i<datanodes.length; i++) { DNClient datanode = datanodes[i]; Map volumeInfoMap = getDatanodeVolumeAttributes(datanode); long usedSpace = (Long)volumeInfoMap.get(ATTRNAME_USED_SPACE); long capacity = (Long)volumeInfoMap.get(ATTRNAME_CAPACITY ); utilizationByNode[i] = ( ((double)usedSpace)/capacity ) * 100; totalUsedSpace += usedSpace; totalCapacity += capacity; } // here we are reusing previously fetched volume-info, for speed // an alternative is to get fresh values from the cluster here instead double avgUtilization = ( totalUsedSpace/totalCapacity ) * 100; for(int i=0; i<datanodes.length; i++) { double varUtilization = Math.abs(avgUtilization - utilizationByNode[i]); if(varUtilization > tolerance) { result = false; break; } } return result; } /** * Make a working directory for storing temporary files * * @throws IOException */ private Path makeTempDir() throws IOException { Path temp = new Path(BALANCER_TEMP_DIR); FileSystem srcFs = temp.getFileSystem(hadoopConf); FileStatus fstatus = null; try { fstatus = srcFs.getFileStatus(temp); if (fstatus.isDir()) { LOG.warn(BALANCER_TEMP_DIR + ": File exists"); } else { LOG.warn(BALANCER_TEMP_DIR + " exists but is not a directory"); } deleteTempDir(temp); } catch (FileNotFoundException fileNotFoundExc) { } finally { if (!srcFs.mkdirs(temp)) { throw new IOException("failed to create " + BALANCER_TEMP_DIR); } } return temp; } /** * Remove the working directory used to store temporary files * * @param temp * @throws IOException */ private void deleteTempDir(Path temp) throws IOException { FileSystem srcFs = temp.getFileSystem(hadoopConf); LOG.info("attempting to delete path " + temp + "; this path exists? -> " + srcFs.exists(temp)); srcFs.delete(temp, true); } private void printDatanodeList(List<DNClient> lis) { for (DNClient datanode : lis) { LOG.info("\t" + datanode.getHostName()); } } private final static String CMD_STOP_DN = "sudo yinst stop hadoop_datanode_admin"; private void stopDatanode(DNClient dn) { String dnHost = dn.getHostName(); runAndWatch(dnHost, CMD_STOP_DN); } private final static String CMD_START_DN = "sudo yinst start hadoop_datanode_admin"; private void startDatanode(DNClient dn) { String dnHost = dn.getHostName(); runAndWatch(dnHost, CMD_START_DN); } /* using "old" default block size of 64M */ private static final int DFS_BLOCK_SIZE = 67108864; private static final short DEFAULT_REPLICATION = 3; private void generateFileSystemLoad(long numBlocks) { generateFileSystemLoad(numBlocks, DEFAULT_REPLICATION); } private void generateFileSystemLoad(long numBlocks, short replication) { String destfile = "hdfs:///user/hadoopqa/";// + BALANCER_TEMP_DIR + "/LOADGEN.DAT"; SecureRandom randgen = new SecureRandom(); ByteArrayOutputStream dat = null; ByteArrayInputStream in = null; final int CHUNK = 4096; final Configuration testConf = new Configuration(hadoopConf); try { testConf.setInt("dfs.replication", replication); for (int i = 0; i < numBlocks; i++) { FileSystem fs = FileSystem.get( URI.create(destfile), testConf); OutputStream out = fs.create( new Path(destfile), replication, new ProgressReporter()); dat = new ByteArrayOutputStream(DFS_BLOCK_SIZE); for (int z = 0; z < DFS_BLOCK_SIZE; z += CHUNK) { byte[] bytes = new byte[CHUNK]; randgen.nextBytes(bytes); dat.write(bytes, 0, CHUNK); } in = new ByteArrayInputStream(dat.toByteArray()); IOUtils.copyBytes(in, out, CHUNK, true); LOG.info("wrote block " + (i + 1) + " of " + numBlocks); } } catch (IOException ioExc) { LOG.warn("f/s loadgen failed!", ioExc); } finally { try { dat.close(); } catch (Exception e) { } try { in.close(); } catch (Exception e) { } } } // TODO this should be taken from the environment public final static String HADOOP_HOME = "/grid/0/gs/gridre/yroot.biga/share/hadoop-current"; public final static String CMD_SSH = "/usr/bin/ssh"; public final static String CMD_KINIT = "/usr/kerberos/bin/kinit"; public final static String CMD_HADOOP = HADOOP_HOME + "/bin/hadoop"; public final static String OPT_BALANCER = "balancer"; public final static String KERB_KEYTAB = "/homes/hadoopqa/hadoopqa.dev.headless.keytab"; public final static String KERB_PRINCIPAL = "hadoopqa@DEV.YGRID.YAHOO.COM"; public final static int DEFAULT_THRESHOLD = 10; private int runBalancer() throws IOException { return runBalancer(DEFAULT_THRESHOLD); } private int runBalancer(int threshold) throws IOException { return runBalancer(""+threshold); } /* * TODO change the heap size balancer uses so it can run on gateways * i.e., 14G heap is too big for gateways */ private int runBalancer(String threshold) throws IOException { String balancerCommand = String.format("\"%s -k -t %s %s; %s %s -threshold %s", CMD_KINIT, KERB_KEYTAB, KERB_PRINCIPAL, CMD_HADOOP, OPT_BALANCER, threshold); String nnHost = dfsCluster.getNNClient().getHostName(); return runAndWatch(nnHost, balancerCommand); } private void runBalancerAndVerify(List<DNClient> testnodes) throws IOException { runBalancerAndVerify(testnodes, DEFAULT_THRESHOLD); } private void runBalancerAndVerify(List<DNClient> testnodes, int threshold) throws IOException { runBalancerAndVerify(testnodes, ""+DEFAULT_THRESHOLD); } private void runBalancerAndVerify(List<DNClient> testnodes, String threshold) throws IOException { int exitStatus = runBalancer(threshold); // assert balancer exits with status SUCCESSe Assert.assertTrue( String.format("balancer returned non-success exit code: %d", exitStatus), (exitStatus == SUCCESS)); DNClient[] testnodeArr = toDatanodeArray(testnodes); Assert.assertTrue( "cluster is not balanced", isClusterBalanced(testnodeArr)); } private int runAndWatch(String remoteHost, String remoteCommand) { int exitStatus = -1; try { Process proc = new ProcessBuilder(CMD_SSH, remoteHost, remoteCommand).start(); watchProcStream(proc.getInputStream(), System.out); watchProcStream(proc.getErrorStream(), System.err); exitStatus = proc.waitFor(); } catch(InterruptedException intExc) { LOG.warn("got thread interrupt error", intExc); } catch(IOException ioExc) { LOG.warn("got i/o error", ioExc); } return exitStatus; } private void watchProcStream(InputStream in, PrintStream out) { new Thread(new StreamWatcher(in, out)).start(); } private static final String DATANODE_VOLUME_INFO = "VolumeInfo"; private static final String ATTRNAME_USED_SPACE = "usedSpace"; private static final String ATTRNAME_FREE_SPACE = "freeSpace"; // pseudo attribute, JMX doesn't really provide this private static final String ATTRNAME_CAPACITY = "capacity"; // TODO maybe the static methods below belong in some utility class... private static long getDatanodeUsedSpace(DNClient datanode) throws IOException { return (Long)getDatanodeVolumeAttributes(datanode).get(ATTRNAME_USED_SPACE); }/* private static long getDatanodeFreeSpace(DNClient datanode) throws IOException { return (Long)getDatanodeVolumeAttributes(datanode).get(ATTRNAME_FREE_SPACE); }*/ private static Map getDatanodeVolumeAttributes(DNClient datanode) throws IOException { Map result = new HashMap(); long usedSpace = getVolumeAttribute(datanode, ATTRNAME_USED_SPACE); long freeSpace = getVolumeAttribute(datanode, ATTRNAME_FREE_SPACE); result.put(ATTRNAME_USED_SPACE, usedSpace); result.put(ATTRNAME_CAPACITY, usedSpace+freeSpace); return result; } private static long getVolumeAttribute(DNClient datanode, String attribName) throws IOException { Object volInfo = datanode.getDaemonAttribute(DATANODE_VOLUME_INFO); Assert .assertNotNull( String .format( "Attribute \"%s\" should be non-null", DATANODE_VOLUME_INFO ), volInfo ); String strVolInfo = volInfo.toString(); LOG.debug( String.format("Value of %s: %s", DATANODE_VOLUME_INFO, strVolInfo) ); Map volInfoMap = (Map) JSON.parse(strVolInfo); long attrVal = 0L; for(Object key: volInfoMap.keySet()) { Map attrMap = (Map) volInfoMap.get(key); long val = (Long) attrMap.get(attribName); attrVal += val; } return attrVal; } /** simple utility to watch streams from an exec'ed process */ static class StreamWatcher implements Runnable { private BufferedReader reader; private PrintStream printer; StreamWatcher(InputStream in, PrintStream out) { reader = getReader(in); printer = out; } private static BufferedReader getReader(InputStream in) { return new BufferedReader(new InputStreamReader(in)); } public void run() { try { if (reader.ready()) { printer.println(reader.readLine()); } } catch (IOException ioExc) { } } } /** simple utility to report progress in generating data */ static class ProgressReporter implements Progressable { StringBuffer buf = null; public void progress() { if (buf == null) { buf = new StringBuffer(); } buf.append("."); if (buf.length() == 10000) { LOG.info(".........."); buf = null; } } } // A constant for SUCCESS exit code static final int SUCCESS = 1; /** * Balancer_01 * Start balancer and check if the cluster is balanced after the run. * Cluster should end up in balanced state. */ @Test public void testBalancerSimple() throws IOException { DNClient[] datanodes = toDatanodeArray( getAllDatanodes() ); int exitStatus = runBalancer(); // assert on successful exit code here Assert.assertTrue( String.format("balancer returned non-success exit code: %d", exitStatus), (exitStatus == SUCCESS)); Assert.assertTrue( "cluster is not balanced", isClusterBalanced(datanodes) ); } /** * Balancer_02 * Test a cluster with even distribution, then a new empty node is * added to the cluster. Here, even distribution effectively means the * cluster is in "balanced" state, as bytes consumed for block allocation * are evenly distributed throughout the cluster. */ @Test public void testBalancerEvenDistributionWithNewNodeAdded() throws IOException { throw new UnsupportedOperationException("not implemented yet!"); // get all nodes // need to get an external reserve of nodes we can boot up // to add to this cluster? // HOW? // IDEA try to steal some nodes from omega-M for now..... // hmmm also need a way to give an alternate "empty-node" config // to "hide" the data that may already exist on this node } /** * Balancer_03 * Bring up a 1-node DFS cluster. Set files replication factor to be 1 * and fill up the node to 30% full. Then add an empty datanode. */ @Test public void testBalancerSingleNodeClusterWithNewNodeAdded() throws IOException { // empty datanode: mod config to point to non-default blocks dir. // limit capacity to available storage space throw new UnsupportedOperationException("not implemented yet!"); } /** * Balancer_04 * The same as _03 except that the empty new data node is on a * different rack. */ @Test public void testBalancerSingleNodeClusterWithNewNodeAddedFromDifferentRack() throws IOException { // need rack awareness throw new UnsupportedOperationException("not implemented yet!"); } /** * Balancer_05 * The same as _03 except that the empty new data node is half the * capacity as the old one. */ @Test public void testBalancerSingleNodeClusterWithHalfCapacityNewNode() { // how to limit node capacity? throw new UnsupportedOperationException("not implemented yet!"); } /** * Balancer_06 * Bring up a 2-node cluster and fill one node to be 60% and the * other to be 10% full. All nodes are on different racks. */ @Test public void testBalancerTwoNodeMultiRackCluster() { // need rack awareness throw new UnsupportedOperationException("not implemented yet!"); } /** * Balancer_07 * Bring up a dfs cluster with nodes A and B. Set file replication * factor to be 2 and fill up the cluster to 30% full. Then add an * empty data node C. All three nodes are on the same rack. */ @Test public void testBalancerTwoNodeSingleRackClusterWuthNewNodeAdded() throws IOException { final short TEST_REPLICATION_FACTOR = 3; List<DNClient> testnodes = reserveDatanodesForTest(3); DNClient dnA = testnodes.get(0); DNClient dnB = testnodes.get(1); DNClient dnC = testnodes.get(2); stopDatanode(dnC); // change test: 30% full-er (ie, 30% over pre-test capacity), // use most heavily node as baseline long targetLoad = (long) ( (1/DFS_BLOCK_SIZE) * 0.30 * Math.max( getDatanodeUsedSpace(dnA), getDatanodeUsedSpace(dnB) ) ); generateFileSystemLoad(targetLoad, TEST_REPLICATION_FACTOR); startDatanode(dnC); runBalancerAndVerify(testnodes); } /** * Balancer_08 * The same as _07 except that A, B and C are on different racks. */ @Test public void testBalancerTwoNodeMultiRackClusterWithNewNodeAdded() throws IOException { // need rack awareness throw new UnsupportedOperationException("not implemented yet!"); } /** * Balancer_09 * The same as _07 except that interrupt balancing. */ @Test public void testBalancerTwoNodeSingleRackClusterInterruptingRebalance() throws IOException { // interrupt thread throw new UnsupportedOperationException("not implemented yet!"); } /** * Balancer_10 * Restart rebalancing until it is done. */ @Test public void testBalancerRestartInterruptedBalancerUntilDone() throws IOException { // need kill-restart thread throw new UnsupportedOperationException("not implemented yet!"); } /** * Balancer_11 * The same as _07 except that the namenode is shutdown while rebalancing. */ @Test public void testBalancerTwoNodeSingleRackShutdownNameNodeDuringRebalance() throws IOException { // need NN shutdown thread in addition throw new UnsupportedOperationException("not implemented yet!"); } /** * Balancer_12 * The same as _05 except that FS writes occur during rebalancing. */ @Test public void testBalancerSingleNodeClusterWithHalfCapacityNewNodeRebalanceWithConcurrentFSWrites() throws IOException { // writer thread throw new UnsupportedOperationException("not implemented yet!"); } /** * Balancer_13 * The same as _05 except that FS deletes occur during rebalancing. */ @Test public void testBalancerSingleNodeClusterWithHalfCapacityNewNodeRebalanceWithConcurrentFSDeletes() throws IOException { // eraser thread throw new UnsupportedOperationException("not implemented yet!"); } /** * Balancer_14 * The same as _05 except that FS deletes AND writes occur during * rebalancing. */ @Test public void testBalancerSingleNodeClusterWithHalfCapacityNewNodeRebalanceWithConcurrentFSDeletesAndWrites() throws IOException { // writer & eraser threads throw new UnsupportedOperationException("not implemented yet!"); } /** * Balancer_15 * Scalability test: Populate a 750-node cluster, then * 1. Run rebalancing after 3 nodes are added * 2. Run rebalancing after 2 racks of nodes (60 nodes) are added * 3. Run rebalancing after 2 racks of nodes are added and concurrently * executing file writing and deleting at the same time */ @Test public void testBalancerScalability() throws IOException { /* work in progress-> * * List<DNClient> dnList = getAllDatanodes(); int dnCount = dnList.size(); Assert.assertTrue( String.format( "not enough datanodes available to run test," + " need 2 datanodes but have only %d available", dnCount), ( dnCount == (875 - 2) )); List<DNClient> datanodes = reserveDatanodesForTest(750); shutdownNonTestNodes(datanodes); */ throw new UnsupportedOperationException("not implemented yet!"); } /** * Balancer_16 * Start balancer with a negative threshold value. */ @Test public void testBalancerConfiguredWithThresholdValueNegative() throws IOException { List<DNClient> testnodes = getAllDatanodes(); final int TRIALS=5; for(int i=0; i<TRIALS; i++) { int negThreshold = (int)(-1 * 100 * Math.random()); runBalancerAndVerify(testnodes, negThreshold); } } /** * Balancer_17 * Start balancer with out-of-range threshold value * (e.g. -123, 0, -324, 100000, -12222222, 1000000000, -10000, 345, 989) */ @Test public void testBalancerConfiguredWithThresholdValueOutOfRange() throws IOException { List<DNClient> testnodes = getAllDatanodes(); final int[] THRESHOLD_OUT_OF_RANGE_DATA = { -123, 0, -324, 100000, -12222222, 1000000000, -10000, 345, 989 }; for(int threshold: THRESHOLD_OUT_OF_RANGE_DATA) { runBalancerAndVerify(testnodes, threshold); } } /** * Balancer_18 * Start balancer with alpha-numeric threshold value * (e.g., 103dsf, asd234, asfd, ASD, #$asd, 2345&, $35, %34) */ @Test public void testBalancerConfiguredWithThresholdValueAlphanumeric() throws IOException { List<DNClient> testnodes = getAllDatanodes(); final String[] THRESHOLD_ALPHA_DATA = { "103dsf", "asd234", "asfd", "ASD", "#$asd", "2345&", "$35", "%34", "0x64", "0xde", "0xad", "0xbe", "0xef" }; for(String threshold: THRESHOLD_ALPHA_DATA) { runBalancerAndVerify(testnodes,threshold); } } /** * Balancer_19 * Start 2 instances of balancer on the same gateway */ @Test public void testBalancerRunTwoConcurrentInstancesOnSingleGateway() throws IOException { // do on gateway logic with small balancer heap throw new UnsupportedOperationException("not implemented yet!"); } /** * Balancer_20 * Start 2 instances of balancer on two different gateways */ @Test public void testBalancerRunTwoConcurrentInstancesOnDistinctGateways() throws IOException { // do on gateway logic with small balancer heap throw new UnsupportedOperationException("not implemented yet!"); } /** * Balancer_21 * Start balancer when the cluster is already balanced */ @Test public void testBalancerOnBalancedCluster() throws IOException { // run balancer twice testBalancerSimple(); testBalancerSimple(); } /** * Balancer_22 * Running the balancer with half the data nodes not running */ @Test public void testBalancerWithOnlyHalfOfDataNodesRunning() throws IOException { List<DNClient> datanodes = getAllDatanodes(); int testnodeCount = (int)Math.floor(datanodes.size() * 0.5); List<DNClient> testnodes = reserveDatanodesForTest(testnodeCount); runBalancerAndVerify(testnodes); } /** * Balancer_23 * Running the balancer and simultaneously simulating load on the * cluster with half the data nodes not running. */ @Test public void testBalancerOnBusyClusterWithOnlyHalfOfDatanodesRunning() throws IOException { // load thread throw new UnsupportedOperationException("not implemented yet!"); } /** * Protocol Test Prelude * * First set up 3 node cluster with nodes NA, NB and NC, which are on * different racks. Then create a file with one block B with a replication * factor 3. Finally add a new node ND to the cluster on the same rack as NC. */ /** * ProtocolTest_01 * Copy block B from ND to NA with del hint NC */ @Test public void testBlockReplacementProtocolFailWhenCopyBlockSourceDoesNotHaveBlockToCopy() throws IOException { throw new UnsupportedOperationException("not implemented yet!"); } /* * ProtocolTest_02 * Copy block B from NA to NB with del hint NB */ @Test public void testBlockReplacementProtocolFailWhenCopyBlockDestinationContainsBlockCopy() throws IOException { throw new UnsupportedOperationException("not implemented yet!"); } /** * ProtocolTest_03 * Copy block B from NA to ND with del hint NB */ @Test public void testBlockReplacementProtocolCopyBlock() throws IOException { throw new UnsupportedOperationException("not implemented yet!"); } /** * ProtocolTest_04 * Copy block B from NB to NC with del hint NA */ @Test public void testBlockReplacementProtocolWithInvalidHint() throws IOException { throw new UnsupportedOperationException("not implemented yet!"); } /** * ThrottleTest_01 * Create a throttler with 1MB/s bandwidth. Send 6MB data, and throttle * at 0.5MB, 0.75MB, and in the end [1MB/s?]. */ /** * NamenodeProtocolTest_01 * Get blocks from datanode 0 with a size of 2 blocks. */ @Test public void testNamenodeProtocolGetBlocksCheckThroughput() throws IOException { throw new UnsupportedOperationException("not implemented yet!"); } /** * NamenodeProtocolTest_02 * Get blocks from datanode 0 with a size of 1 block. */ @Test public void testNamenodeProtocolGetSingleBlock() throws IOException { throw new UnsupportedOperationException("not implemented yet!"); } /** * NamenodeProtocolTest_03 * Get blocks from datanode 0 with a size of 0. */ @Test public void testNamenodeProtocolGetZeroBlocks() throws IOException { throw new UnsupportedOperationException("not implemented yet!"); } /** * NamenodeProtocolTest_04 * Get blocks from datanode 0 with a size of -1. */ @Test public void testNamenodeProtocolGetMinusOneBlocks() throws Exception { } /** * NamenodeProtocolTest_05 * Get blocks from a non-existent datanode. */ @Test public void testNamenodeProtocolGetBlocksFromNonexistentDatanode() throws IOException { final short replication = 1; Path balancerTempDir = null; try { // reserve 2 nodes for test List<DNClient> testnodes = reserveDatanodesForTest(2); shutdownNonTestNodes(testnodes); DNClient testnode1 = testnodes.get(0); DNClient testnode2 = testnodes.get(1); // write some blocks with replication factor of 1 balancerTempDir = makeTempDir(); generateFileSystemLoad(20, replication); // get block locations from NN NNClient namenode = dfsCluster.getNNClient(); // TODO extend namenode to get block locations //namenode.get // shutdown 1 node stopDatanode(testnode1); // attempt to retrieve blocks from the dead node // we should fail } finally { // cleanup // finally block to run cleanup LOG.info("clean off test data from DFS [rmr ~/balancer-temp]"); try { deleteTempDir(balancerTempDir); } catch (Exception e) { LOG.warn("problem cleaning up temp dir", e); } } } }