/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.namenode; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.IOException; import java.io.PrintStream; import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.Random; import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties; import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; import org.apache.hadoop.hdfs.server.blockmanagement.DecommissionManager; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils; import org.apache.hadoop.hdfs.tools.DFSAdmin; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; /** * This class tests the decommissioning of nodes. */ public class TestDecommissioningStatus { private static final long seed = 0xDEADBEEFL; private static final int blockSize = 8192; private static final int fileSize = 16384; private static final int numDatanodes = 2; private static MiniDFSCluster cluster; private static FileSystem fileSys; private static Path excludeFile; private static FileSystem localFileSys; private static Configuration conf; private static Path dir; final ArrayList<String> decommissionedNodes = new ArrayList<String>(numDatanodes); @BeforeClass public static void setUp() throws Exception { conf = new HdfsConfiguration(); conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REPLICATION_CONSIDERLOAD_KEY, false); // Set up the hosts/exclude files. localFileSys = FileSystem.getLocal(conf); Path workingDir = localFileSys.getWorkingDirectory(); dir = new Path(workingDir, "build/test/data/work-dir/decommission"); assertTrue(localFileSys.mkdirs(dir)); excludeFile = new Path(dir, "exclude"); conf.set(DFSConfigKeys.DFS_HOSTS_EXCLUDE, excludeFile.toUri().getPath()); Path includeFile = new Path(dir, "include"); conf.set(DFSConfigKeys.DFS_HOSTS, includeFile.toUri().getPath()); conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 1000); conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, 4); conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1000); conf.setInt(DFSConfigKeys.DFS_NAMENODE_DECOMMISSION_INTERVAL_KEY, 1); conf.setLong(DFSConfigKeys.DFS_DATANODE_BALANCE_BANDWIDTHPERSEC_KEY, 1); writeConfigFile(localFileSys, excludeFile, null); writeConfigFile(localFileSys, includeFile, null); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDatanodes).build(); cluster.waitActive(); fileSys = cluster.getFileSystem(); cluster.getNamesystem().getBlockManager().getDatanodeManager() .setHeartbeatExpireInterval(3000); Logger.getLogger(DecommissionManager.class).setLevel(Level.DEBUG); } @AfterClass public static void tearDown() throws Exception { if (localFileSys != null ) cleanupFile(localFileSys, dir); if(fileSys != null) fileSys.close(); if(cluster != null) cluster.shutdown(); } private static void writeConfigFile(FileSystem fs, Path name, ArrayList<String> nodes) throws IOException { // delete if it already exists if (fs.exists(name)) { fs.delete(name, true); } FSDataOutputStream stm = fs.create(name); if (nodes != null) { for (Iterator<String> it = nodes.iterator(); it.hasNext();) { String node = it.next(); stm.writeBytes(node); stm.writeBytes("\n"); } } stm.close(); } private void writeFile(FileSystem fileSys, Path name, short repl) throws IOException { // create and write a file that contains three blocks of data FSDataOutputStream stm = fileSys.create(name, true, fileSys.getConf() .getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096), repl, blockSize); byte[] buffer = new byte[fileSize]; Random rand = new Random(seed); rand.nextBytes(buffer); stm.write(buffer); stm.close(); } private FSDataOutputStream writeIncompleteFile(FileSystem fileSys, Path name, short repl) throws IOException { // create and write a file that contains three blocks of data FSDataOutputStream stm = fileSys.create(name, true, fileSys.getConf() .getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096), repl, blockSize); byte[] buffer = new byte[fileSize]; Random rand = new Random(seed); rand.nextBytes(buffer); stm.write(buffer); // need to make sure that we actually write out both file blocks // (see FSOutputSummer#flush) stm.flush(); // Do not close stream, return it // so that it is not garbage collected return stm; } static private void cleanupFile(FileSystem fileSys, Path name) throws IOException { assertTrue(fileSys.exists(name)); fileSys.delete(name, true); assertTrue(!fileSys.exists(name)); } /* * Decommissions the node at the given index */ private String decommissionNode(FSNamesystem namesystem, DFSClient client, FileSystem localFileSys, int nodeIndex) throws IOException { DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE); String nodename = info[nodeIndex].getXferAddr(); decommissionNode(namesystem, localFileSys, nodename); return nodename; } /* * Decommissions the node by name */ private void decommissionNode(FSNamesystem namesystem, FileSystem localFileSys, String dnName) throws IOException { System.out.println("Decommissioning node: " + dnName); // write nodename into the exclude file. ArrayList<String> nodes = new ArrayList<String>(decommissionedNodes); nodes.add(dnName); writeConfigFile(localFileSys, excludeFile, nodes); } private void checkDecommissionStatus(DatanodeDescriptor decommNode, int expectedUnderRep, int expectedDecommissionOnly, int expectedUnderRepInOpenFiles) { assertEquals("Unexpected num under-replicated blocks", expectedUnderRep, decommNode.decommissioningStatus.getUnderReplicatedBlocks()); assertEquals("Unexpected number of decom-only replicas", expectedDecommissionOnly, decommNode.decommissioningStatus.getDecommissionOnlyReplicas()); assertEquals( "Unexpected number of replicas in under-replicated open files", expectedUnderRepInOpenFiles, decommNode.decommissioningStatus.getUnderReplicatedInOpenFiles()); } private void checkDFSAdminDecommissionStatus( List<DatanodeDescriptor> expectedDecomm, DistributedFileSystem dfs, DFSAdmin admin) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); PrintStream oldOut = System.out; System.setOut(ps); try { // Parse DFSAdmin just to check the count admin.report(new String[] {"-decommissioning"}, 0); String[] lines = baos.toString().split("\n"); Integer num = null; int count = 0; for (String line: lines) { if (line.startsWith("Decommissioning datanodes")) { // Pull out the "(num)" and parse it into an int String temp = line.split(" ")[2]; num = Integer.parseInt((String) temp.subSequence(1, temp.length() - 2)); } if (line.contains("Decommission in progress")) { count++; } } assertTrue("No decommissioning output", num != null); assertEquals("Unexpected number of decomming DNs", expectedDecomm.size(), num.intValue()); assertEquals("Unexpected number of decomming DNs", expectedDecomm.size(), count); // Check Java API for correct contents List<DatanodeInfo> decomming = new ArrayList<DatanodeInfo>(Arrays.asList(dfs .getDataNodeStats(DatanodeReportType.DECOMMISSIONING))); assertEquals("Unexpected number of decomming DNs", expectedDecomm.size(), decomming.size()); for (DatanodeID id : expectedDecomm) { assertTrue("Did not find expected decomming DN " + id, decomming.contains(id)); } } finally { System.setOut(oldOut); } } /** * Tests Decommissioning Status in DFS. */ @Test public void testDecommissionStatus() throws Exception { InetSocketAddress addr = new InetSocketAddress("localhost", cluster .getNameNodePort()); DFSClient client = new DFSClient(addr, conf); DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE); assertEquals("Number of Datanodes ", 2, info.length); DistributedFileSystem fileSys = cluster.getFileSystem(); DFSAdmin admin = new DFSAdmin(cluster.getConfiguration(0)); short replicas = numDatanodes; // // Decommission one node. Verify the decommission status // Path file1 = new Path("decommission.dat"); writeFile(fileSys, file1, replicas); Path file2 = new Path("decommission1.dat"); FSDataOutputStream st1 = writeIncompleteFile(fileSys, file2, replicas); for (DataNode d: cluster.getDataNodes()) { DataNodeTestUtils.triggerBlockReport(d); } FSNamesystem fsn = cluster.getNamesystem(); final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager(); for (int iteration = 0; iteration < numDatanodes; iteration++) { String downnode = decommissionNode(fsn, client, localFileSys, iteration); dm.refreshNodes(conf); decommissionedNodes.add(downnode); BlockManagerTestUtil.recheckDecommissionState(dm); final List<DatanodeDescriptor> decommissioningNodes = dm.getDecommissioningNodes(); if (iteration == 0) { assertEquals(decommissioningNodes.size(), 1); DatanodeDescriptor decommNode = decommissioningNodes.get(0); checkDecommissionStatus(decommNode, 3, 0, 1); checkDFSAdminDecommissionStatus(decommissioningNodes.subList(0, 1), fileSys, admin); } else { assertEquals(decommissioningNodes.size(), 2); DatanodeDescriptor decommNode1 = decommissioningNodes.get(0); DatanodeDescriptor decommNode2 = decommissioningNodes.get(1); // This one is still 3,3,1 since it passed over the UC block // earlier, before node 2 was decommed checkDecommissionStatus(decommNode1, 3, 3, 1); // This one is 4,4,2 since it has the full state checkDecommissionStatus(decommNode2, 4, 4, 2); checkDFSAdminDecommissionStatus(decommissioningNodes.subList(0, 2), fileSys, admin); } } // Call refreshNodes on FSNamesystem with empty exclude file. // This will remove the datanodes from decommissioning list and // make them available again. writeConfigFile(localFileSys, excludeFile, null); dm.refreshNodes(conf); st1.close(); cleanupFile(fileSys, file1); cleanupFile(fileSys, file2); } /** * Verify a DN remains in DECOMMISSION_INPROGRESS state if it is marked * as dead before decommission has completed. That will allow DN to resume * the replication process after it rejoins the cluster. */ @Test(timeout=120000) public void testDecommissionStatusAfterDNRestart() throws Exception { DistributedFileSystem fileSys = (DistributedFileSystem)cluster.getFileSystem(); // Create a file with one block. That block has one replica. Path f = new Path("decommission.dat"); DFSTestUtil.createFile(fileSys, f, fileSize, fileSize, fileSize, (short)1, seed); // Find the DN that owns the only replica. RemoteIterator<LocatedFileStatus> fileList = fileSys.listLocatedStatus(f); BlockLocation[] blockLocations = fileList.next().getBlockLocations(); String dnName = blockLocations[0].getNames()[0]; // Decommission the DN. FSNamesystem fsn = cluster.getNamesystem(); final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager(); decommissionNode(fsn, localFileSys, dnName); dm.refreshNodes(conf); // Stop the DN when decommission is in progress. // Given DFS_DATANODE_BALANCE_BANDWIDTHPERSEC_KEY is to 1 and the size of // the block, it will take much longer time that test timeout value for // the decommission to complete. So when stopDataNode is called, // decommission should be in progress. DataNodeProperties dataNodeProperties = cluster.stopDataNode(dnName); final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>(); while (true) { dm.fetchDatanodes(null, dead, false); if (dead.size() == 1) { break; } Thread.sleep(1000); } // Force removal of the dead node's blocks. BlockManagerTestUtil.checkHeartbeat(fsn.getBlockManager()); // Force DatanodeManager to check decommission state. BlockManagerTestUtil.recheckDecommissionState(dm); // Verify that the DN remains in DECOMMISSION_INPROGRESS state. assertTrue("the node should be DECOMMISSION_IN_PROGRESSS", dead.get(0).isDecommissionInProgress()); // Check DatanodeManager#getDecommissionNodes, make sure it returns // the node as decommissioning, even if it's dead List<DatanodeDescriptor> decomlist = dm.getDecommissioningNodes(); assertTrue("The node should be be decommissioning", decomlist.size() == 1); // Delete the under-replicated file, which should let the // DECOMMISSION_IN_PROGRESS node become DECOMMISSIONED cleanupFile(fileSys, f); BlockManagerTestUtil.recheckDecommissionState(dm); assertTrue("the node should be decommissioned", dead.get(0).isDecommissioned()); // Add the node back cluster.restartDataNode(dataNodeProperties, true); cluster.waitActive(); // Call refreshNodes on FSNamesystem with empty exclude file. // This will remove the datanodes from decommissioning list and // make them available again. writeConfigFile(localFileSys, excludeFile, null); dm.refreshNodes(conf); } /** * Verify the support for decommissioning a datanode that is already dead. * Under this scenario the datanode should immediately be marked as * DECOMMISSIONED */ @Test(timeout=120000) public void testDecommissionDeadDN() throws Exception { Logger log = Logger.getLogger(DecommissionManager.class); log.setLevel(Level.DEBUG); DatanodeID dnID = cluster.getDataNodes().get(0).getDatanodeId(); String dnName = dnID.getXferAddr(); DataNodeProperties stoppedDN = cluster.stopDataNode(0); DFSTestUtil.waitForDatanodeState(cluster, dnID.getDatanodeUuid(), false, 30000); FSNamesystem fsn = cluster.getNamesystem(); final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager(); DatanodeDescriptor dnDescriptor = dm.getDatanode(dnID); decommissionNode(fsn, localFileSys, dnName); dm.refreshNodes(conf); BlockManagerTestUtil.recheckDecommissionState(dm); assertTrue(dnDescriptor.isDecommissioned()); // Add the node back cluster.restartDataNode(stoppedDN, true); cluster.waitActive(); // Call refreshNodes on FSNamesystem with empty exclude file to remove the // datanode from decommissioning list and make it available again. writeConfigFile(localFileSys, excludeFile, null); dm.refreshNodes(conf); } }