/* * Copyright (C) 2015 hops.io. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.namenode; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.log4j.Level; import org.junit.After; import org.junit.Test; import java.io.IOException; import java.util.List; import java.util.concurrent.TimeoutException; public class TestHABasicFailover extends junit.framework.TestCase { public static final Log LOG = LogFactory.getLog(TestHABasicFailover.class); { ((Log4JLogger) NameNode.stateChangeLog).getLogger().setLevel(Level.ALL); ((Log4JLogger) LeaseManager.LOG).getLogger().setLevel(Level.ALL); ((Log4JLogger) LogFactory.getLog(FSNamesystem.class)).getLogger() .setLevel(Level.ALL); } Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = null; int NUM_NAMENODES = 2; int NUM_DATANODES = 1; @Override @After public void tearDown() throws Exception { cluster.shutdown(); } /** * Testing basic failover. After starting namenodes NN1, NN2, the namenode * that first initializes itself would be elected the leader. We allow NN1 * to be the leader. We kill NN1. Failover will start and NN2 will detect * failure of NN1 and hence would elect itself as the leader Also perform * fail-back to NN1 by killing NN2 */ @Test(timeout = 900000) public void testFailover() throws IOException, TimeoutException { final int NN1 = 0, NN2 = 1; if (NUM_NAMENODES < 2) { NUM_NAMENODES = 2; } try { // Create cluster with 2 namenodes cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHOPSTopology(NUM_NAMENODES)) .numDataNodes(NUM_DATANODES).build(); cluster.waitActive(); // Give it time for leader to be elected long timeout = conf.getInt(DFSConfigKeys.DFS_LEADER_CHECK_INTERVAL_IN_MS_KEY, DFSConfigKeys.DFS_LEADER_CHECK_INTERVAL_IN_MS_DEFAULT) + conf.getLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_DEFAULT) * 1000L; /** * ********************************* * testing fail over from NN1 to NN2 * ********************************** */ // Check NN1 is the leader LOG.info( "NameNode 1 id " + cluster.getNameNode(NN1).getId() + " address " + cluster.getNameNode(NN1).getServiceRpcAddress().toString()); LOG.info( "NameNode 2 id " + cluster.getNameNode(NN2).getId() + " address " + cluster.getNameNode(NN2).getServiceRpcAddress().toString()); assertTrue("NN1 is expected to be leader, but is not", cluster.getNameNode(NN1).isLeader()); // performing failover - Kill NN1. This would allow NN2 to be leader cluster.shutdownNameNode(NN1); // wait for leader to be elected and for Datanodes to also detect the leader waitLeaderElection(cluster.getDataNodes(), cluster.getNameNode(NN2), timeout * 10); // Check NN2 is the leader and failover is detected assertTrue("NN2 is expected to be the leader, but is not", cluster.getNameNode(NN2).isLeader()); assertTrue("Not all datanodes detected the new leader", doesDataNodesRecognizeLeader(cluster.getDataNodes(), cluster.getNameNode(NN2))); LOG.debug("TestNN going to restart the NN2"); // restart the newly elected leader and see if it is still the leader cluster.restartNameNode(NN2); cluster.waitActive(); waitLeaderElection(cluster.getDataNodes(), cluster.getNameNode(NN2), timeout * 10); assertTrue("NN2 is expected to be the leader, but is not", cluster.getNameNode(NN2).isLeader()); assertTrue("Not all datanodes detected the new leader", doesDataNodesRecognizeLeader(cluster.getDataNodes(), cluster.getNameNode(NN2))); /** * ************************************** * testing fail-back after some interval datanode asks for a * namenode to return all alive namenodes in the system. * * datanode starts new threads for new namenodes. if it finds out that some * previous namenode is dead then the corresponding service thread * is killed. * * A datanodes find out new namenodes by asking existing name nodes * in the system. what happen data node is connected to X set of * namenodes and they all die suddenly; and after a while Y set of * namenodes come online. datanode will have no way of finding out * namenodes belonging to set Y * * there is no fix for it yet. if such thing happens then restart * datanode with some correct namenode. * * in the tests such secnaiors are avoided by making sure that * datanodes are connected to atleast one name node after killing * other namenodes. ************************************** */ // Doing a fail back scenario to NN1 cluster.restartNameNode( NN1); // will be restarted in the system with the next highest id while NN2 is still the leader cluster.waitActive(); waitLeaderElection(cluster.getDataNodes(), cluster.getNameNode(NN2), timeout * 10); cluster.shutdownNameNode(NN2); cluster.waitActive(); // waiting for NN1 to elect itself as the leader waitLeaderElection(cluster.getDataNodes(), cluster.getNameNode(NN1), timeout * 10); assertTrue("NN1 is expected to be the leader, but is not", cluster.getNameNode(NN1).isLeader()); assertTrue("Not all datanodes detected the new leader", doesDataNodesRecognizeLeader(cluster.getDataNodes(), cluster.getNameNode(NN1))); } finally { if (cluster != null) { cluster.shutdown(); } } } public static boolean doesDataNodesRecognizeLeader(List<DataNode> datanodes, NameNode namenode) { boolean result = true; for (DataNode datanode : datanodes) { result = result & datanode.isConnectedToNN(namenode.getNameNodeAddress()); } return result; } public static void waitLeaderElection(List<DataNode> datanodes, NameNode nn, long timeout) throws TimeoutException { // wait for the new leader to be elected long initTime = System.currentTimeMillis(); while (!nn.isLeader()) { try { Thread.sleep(500); } catch (InterruptedException ex) { ex.printStackTrace(); } // check for time out if (System.currentTimeMillis() - initTime >= timeout) { throw new TimeoutException( "Namenode was not elected leader. Time out " + timeout); } } // wait for all datanodes to recognize the new leader initTime = System.currentTimeMillis(); while (true) { try { Thread.sleep(2000); // 2sec } catch (InterruptedException ex) { ex.printStackTrace(); } boolean result = doesDataNodesRecognizeLeader(datanodes, nn); if (result) { break; } // check for time out if (System.currentTimeMillis() - initTime >= timeout) { throw new TimeoutException( "Datanodes weren't able to detect newly elected leader"); } } } }