/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs; import java.io.File; import java.io.IOException; import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.Random; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.FSConstants; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates; import org.apache.hadoop.hdfs.protocol.FSConstants.DatanodeReportType; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.NameNode; import static org.junit.Assert.*; import org.junit.After; import org.junit.Before; import org.junit.Test; /** * This class tests the decommissioning of nodes. */ public class TestDecommission { public static final Log LOG = LogFactory.getLog(TestDecommission.class); static final long seed = 0xDEADBEEFL; static final int blockSize = 8192; static final int fileSize = 16384; static final int HEARTBEAT_INTERVAL = 1; // heartbeat interval in seconds Random myrand = new Random(); Path hostsFile; Path excludeFile; FileSystem localFileSys; Configuration conf; MiniDFSCluster cluster = null; private void cleanFile(Path p) throws IOException { File f = new File(p.toUri().getPath()); f.getParentFile().mkdirs(); if (f.exists()) { f.delete(); } f.createNewFile(); } @Before public void setup() throws IOException { conf = new Configuration(); // Set up the hosts/exclude files. localFileSys = FileSystem.getLocal(conf); Path workingDir = localFileSys.getWorkingDirectory(); Path dir = new Path(workingDir, "build/test/data/work-dir/decommission/"); hostsFile = new Path(dir, "hosts"); excludeFile = new Path(dir, "exclude"); cleanFile(hostsFile); cleanFile(excludeFile); // Setup conf conf.setBoolean("dfs.replication.considerLoad", false); conf.set("dfs.hosts.exclude", excludeFile.toUri().getPath()); conf.setInt("heartbeat.recheck.interval", 2000); conf.setInt("dfs.heartbeat.interval", HEARTBEAT_INTERVAL); conf.setInt("dfs.replication.pending.timeout.sec", 4); writeConfigFile(excludeFile, null); } @After public void teardown() throws IOException { cleanupFile(localFileSys, excludeFile.getParent()); if (cluster != null) { cluster.shutdown(); } } private void writeConfigFile(Path name, ArrayList<String> nodes) throws IOException { // delete if it already exists if (localFileSys.exists(name)) { localFileSys.delete(name, true); } FSDataOutputStream stm = localFileSys.create(name); if (nodes != null) { for (Iterator<String> it = nodes.iterator(); it.hasNext();) { String node = it.next(); stm.writeBytes(node); stm.writeBytes("\n"); } } stm.close(); } private void writeFile(FileSystem fileSys, Path name, int repl) throws IOException { // create and write a file that contains three blocks of data FSDataOutputStream stm = fileSys.create(name, true, fileSys.getConf().getInt("io.file.buffer.size", 4096), (short)repl, (long)blockSize); byte[] buffer = new byte[fileSize]; Random rand = new Random(seed); rand.nextBytes(buffer); stm.write(buffer); stm.close(); LOG.info("Created file " + name + " with " + repl + " replicas."); } /** * For blocks that reside on the nodes that are down, verify that their * replication factor is 1 more than the specified one. */ private void checkFile(FileSystem fileSys, Path name, int repl, String downnode, int numDatanodes) throws IOException { // // sleep an additional 10 seconds for the blockreports from the datanodes // to arrive. // // need a raw stream assertTrue("Not HDFS:"+fileSys.getUri(), fileSys instanceof DistributedFileSystem); DFSClient.DFSDataInputStream dis = (DFSClient.DFSDataInputStream) ((DistributedFileSystem)fileSys).open(name); Collection<LocatedBlock> dinfo = dis.getAllBlocks(); for (LocatedBlock blk : dinfo) { // for each block int hasdown = 0; int firstDecomNodeIndex = -1; DatanodeInfo[] nodes = blk.getLocations(); for (int j = 0; j < nodes.length; j++) { // for each replica if (nodes[j].getName().equals(downnode)) { hasdown++; LOG.info("Block " + blk.getBlock() + " replica " + nodes[j].getName() + " is decommissioned."); } if (nodes[j].isDecommissioned()) { if (firstDecomNodeIndex == -1) { firstDecomNodeIndex = j; } continue; } assertEquals("Decom node is not at the end", firstDecomNodeIndex, -1); } LOG.info("Block " + blk.getBlock() + " has " + hasdown + " decommissioned replica."); assertEquals("Number of replicas for block " + blk.getBlock(), Math.min(numDatanodes, repl+hasdown), nodes.length); } } private void cleanupFile(FileSystem fileSys, Path name) throws IOException { assertTrue(fileSys.exists(name)); fileSys.delete(name, true); assertTrue(!fileSys.exists(name)); } /* * decommission one random node and wait for each to reach the * given {@code waitForState}. */ private DatanodeInfo decommissionNode(int nnIndex, ArrayList<DatanodeInfo>decommissionedNodes, AdminStates waitForState) throws IOException { DFSClient client = getDfsClient(cluster.getNameNode(nnIndex), conf); DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE); // // pick one datanode randomly. // int index = 0; boolean found = false; while (!found) { index = myrand.nextInt(info.length); if (!info[index].isDecommissioned()) { found = true; } } String nodename = info[index].getName(); LOG.info("Decommissioning node: " + nodename); // write nodename into the exclude file. ArrayList<String> nodes = new ArrayList<String>(); if (decommissionedNodes != null) { for (DatanodeInfo dn : decommissionedNodes) { nodes.add(dn.getName()); } } nodes.add(nodename); writeConfigFile(excludeFile, nodes); cluster.getNameNode(nnIndex).namesystem.refreshNodes(conf); DatanodeInfo ret = cluster.getNameNode(nnIndex).namesystem.getDatanode(info[index]); waitNodeState(ret, waitForState); return ret; } /* * Wait till node is fully decommissioned. */ private void waitNodeState(DatanodeInfo node, AdminStates state) throws IOException { boolean done = state == node.getAdminState(); while (!done) { LOG.info("Waiting for node " + node + " to change state to " + state + " current state: " + node.getAdminState()); try { Thread.sleep(HEARTBEAT_INTERVAL * 1000); } catch (InterruptedException e) { // nothing } done = state == node.getAdminState(); } LOG.info("node " + node + " reached the state " + state); } /* Get DFSClient to the namenode */ private static DFSClient getDfsClient(NameNode nn, Configuration conf) throws IOException { return new DFSClient(nn.getNameNodeAddress(), conf); } /* Validate cluster has expected number of datanodes */ private static void validateCluster(DFSClient client, int numDNs) throws IOException { DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE); assertEquals("Number of Datanodes ", numDNs, info.length); } /** Start a MiniDFSCluster * @throws IOException */ private void startCluster(int numNameNodes, int numDatanodes, Configuration conf, boolean federation) throws IOException { if (federation) { cluster = new MiniDFSCluster(conf, numDatanodes, true, null, numNameNodes); } else { cluster = new MiniDFSCluster(conf, numDatanodes, true, null); } cluster.waitActive(); for (int i = 0; i < numNameNodes; i++) { DFSClient client = getDfsClient(cluster.getNameNode(i), conf); validateCluster(client, numDatanodes); } } private void verifyStats(NameNode namenode, FSNamesystem fsn, DatanodeInfo node, boolean decommissioning) throws InterruptedException, IOException{ // Do the stats check over 10 iterations for (int i = 0; i < 10; i++) { long[] newStats = namenode.getStats(); // For decommissioning nodes, ensure capacity of the DN is no longer // counted. Only used space of the DN is counted in cluster capacity assertEquals(newStats[0], decommissioning ? node.getDfsUsed() : node.getCapacity()); // Ensure cluster used capacity is counted for both normal and // decommissioning nodes assertEquals(newStats[1], node.getDfsUsed()); // For decommissioning nodes, remaining space from the DN is not counted assertEquals(newStats[2], decommissioning ? 0 : node.getRemaining()); // Ensure transceiver count is same as that DN assertEquals(fsn.getTotalLoad(), node.getXceiverCount()); Thread.sleep(HEARTBEAT_INTERVAL * 1000); // Sleep heart beat interval } } /** * Tests decommission for non federated cluster */ @Test public void testDecommission() throws IOException { testDecommission(1, 6, false); } /** * Test decommission for federeated cluster */ @Test public void testDecommissionFederation() throws IOException { testDecommission(2, 2, true); } private void testDecommission(int numNamenodes, int numDatanodes, boolean federation) throws IOException { LOG.info("Starting test testDecommission"); startCluster(numNamenodes, numDatanodes, conf, federation); ArrayList<ArrayList<DatanodeInfo>> namenodeDecomList = new ArrayList<ArrayList<DatanodeInfo>>(numNamenodes); for(int i = 0; i < numNamenodes; i++) { namenodeDecomList.add(i, new ArrayList<DatanodeInfo>(numDatanodes)); } Path file1 = new Path("testDecommission.dat"); for (int iteration = 0; iteration < numDatanodes - 1; iteration++) { int replicas = numDatanodes - iteration - 1; // Start decommissioning one namenode at a time for (int i = 0; i < numNamenodes; i++) { ArrayList<DatanodeInfo> decommissionedNodes = namenodeDecomList.get(i); FileSystem fileSys = cluster.getFileSystem(i); writeFile(fileSys, file1, replicas); // Decommission one node. Verify that node is decommissioned. DatanodeInfo decomNode = decommissionNode(i, decommissionedNodes, AdminStates.DECOMMISSIONED); decommissionedNodes.add(decomNode); // Ensure decommissioned datanode is not automatically shutdown DFSClient client = getDfsClient(cluster.getNameNode(i), conf); assertEquals("All datanodes must be alive", numDatanodes, client.datanodeReport(DatanodeReportType.LIVE).length); checkFile(fileSys, file1, replicas, decomNode.getName(), numDatanodes); cleanupFile(fileSys, file1); } } // Restart the cluster and ensure decommissioned datanodes // are allowed to register with the namenode cluster.shutdown(); startCluster(numNamenodes, numDatanodes, conf, federation); } /** * Tests cluster storage statistics during decommissioning for non * federated cluster */ @Test public void testClusterStats() throws Exception { testClusterStats(1, false); } /** * Tests cluster storage statistics during decommissioning for * federated cluster */ @Test public void testClusterStatsFederation() throws Exception { testClusterStats(3, true); } public void testClusterStats(int numNameNodes, boolean federation) throws IOException, InterruptedException { LOG.info("Starting test testClusterStats"); int numDatanodes = 1; startCluster(numNameNodes, numDatanodes, conf, federation); for (int i = 0; i < numNameNodes; i++) { FileSystem fileSys = cluster.getFileSystem(i); Path file = new Path("testClusterStats.dat"); writeFile(fileSys, file, 1); NameNode namenode = cluster.getNameNode(i); FSNamesystem fsn = namenode.namesystem; DatanodeInfo downnode = decommissionNode(i, null, AdminStates.DECOMMISSION_INPROGRESS); // Check namenode stats for multiple datanode heartbeats verifyStats(namenode, fsn, downnode, true); // Stop decommissioning and verify stats writeConfigFile(excludeFile, null); fsn.refreshNodes(conf); DatanodeInfo ret = fsn.getDatanode(downnode); waitNodeState(ret, AdminStates.NORMAL); verifyStats(namenode, fsn, ret, false); } } /** * Test host/include file functionality. Only datanodes * in the include file are allowed to connect to the namenode in a non * federated cluster. */ @Test public void testHostsFile() throws IOException, InterruptedException { // Test for a single namenode cluster testHostsFile(1, false); } /** * Test host/include file functionality. Only datanodes * in the include file are allowed to connect to the namenode in a * federated cluster. */ @Test public void testHostsFileFederation() throws IOException, InterruptedException { // Test for 3 namenode federated cluster testHostsFile(3, true); } public void testHostsFile(int numNameNodes, boolean federation) throws IOException, InterruptedException { conf.set(FSConstants.DFS_HOSTS, hostsFile.toUri().getPath()); int numDatanodes = 1; cluster = new MiniDFSCluster(0, conf, numDatanodes, true, true, true, null, null, null, null, true, true, numNameNodes, federation); cluster.waitActive(); // Now empty hosts file and ensure the datanode is disallowed // from talking to namenode, resulting in it's shutdown. ArrayList<String>list = new ArrayList<String>(); list.add("invalidhost"); writeConfigFile(hostsFile, list); for (int j = 0; j < numNameNodes; j++) { cluster.getNameNode(j).namesystem.refreshNodes(conf); DFSClient client = getDfsClient(cluster.getNameNode(j), conf); DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE); for (int i = 0 ; i < 20 && info.length != 0; i++) { LOG.info("Waiting for datanode to be marked dead"); Thread.sleep(HEARTBEAT_INTERVAL * 1000); info = client.datanodeReport(DatanodeReportType.LIVE); } assertEquals("Number of live nodes should be 0", 0, info.length); } } }