/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.namenode.ha; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.DataOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.net.URI; import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.ha.HAServiceProtocol.RequestSource; import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; import org.apache.hadoop.hdfs.server.namenode.EditLogFileOutputStream; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread; import org.apache.hadoop.test.MultithreadedTestUtil.TestContext; import org.apache.log4j.Level; import org.junit.Assert; import org.junit.Test; import org.mockito.Mockito; /** * Tests state transition from active->standby, and manual failover * and failback between two namenodes. */ public class TestHAStateTransitions { protected static final Log LOG = LogFactory.getLog( TestStandbyIsHot.class); private static final Path TEST_DIR = new Path("/test"); private static final Path TEST_FILE_PATH = new Path(TEST_DIR, "foo"); private static final String TEST_FILE_STR = TEST_FILE_PATH.toUri().getPath(); private static final String TEST_FILE_DATA = "Hello state transitioning world"; private static final StateChangeRequestInfo REQ_INFO = new StateChangeRequestInfo( RequestSource.REQUEST_BY_USER_FORCED); static { ((Log4JLogger)EditLogTailer.LOG).getLogger().setLevel(Level.ALL); } /** * Test which takes a single node and flip flops between * active and standby mode, making sure it doesn't * double-play any edits. */ @Test public void testTransitionActiveToStandby() throws Exception { Configuration conf = new Configuration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()) .numDataNodes(1) .build(); try { cluster.waitActive(); cluster.transitionToActive(0); FileSystem fs = cluster.getFileSystem(0); fs.mkdirs(TEST_DIR); cluster.transitionToStandby(0); try { fs.mkdirs(new Path("/x")); fail("Didn't throw trying to mutate FS in standby state"); } catch (Throwable t) { GenericTestUtils.assertExceptionContains( "Operation category WRITE is not supported", t); } cluster.transitionToActive(0); // Create a file, then delete the whole directory recursively. DFSTestUtil.createFile(fs, new Path(TEST_DIR, "foo"), 10, (short)1, 1L); fs.delete(TEST_DIR, true); // Now if the standby tries to replay the last segment that it just // wrote as active, it would fail since it's trying to create a file // in a non-existent directory. cluster.transitionToStandby(0); cluster.transitionToActive(0); assertFalse(fs.exists(TEST_DIR)); } finally { cluster.shutdown(); } } /** * Test that transitioning a service to the state that it is already * in is a nop, specifically, an exception is not thrown. */ @Test public void testTransitionToCurrentStateIsANop() throws Exception { Configuration conf = new Configuration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()) .numDataNodes(1) .build(); try { cluster.waitActive(); cluster.transitionToActive(0); cluster.transitionToActive(0); cluster.transitionToStandby(0); cluster.transitionToStandby(0); } finally { cluster.shutdown(); } } /** * Test manual failover failback for one namespace * @param cluster single process test cluster * @param conf cluster configuration * @param nsIndex namespace index starting from zero * @throws Exception */ private void testManualFailoverFailback(MiniDFSCluster cluster, Configuration conf, int nsIndex) throws Exception { int nn0 = 2 * nsIndex, nn1 = 2 * nsIndex + 1; cluster.transitionToActive(nn0); LOG.info("Starting with NN 0 active in namespace " + nsIndex); FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf); fs.mkdirs(TEST_DIR); LOG.info("Failing over to NN 1 in namespace " + nsIndex); cluster.transitionToStandby(nn0); cluster.transitionToActive(nn1); assertTrue(fs.exists(TEST_DIR)); DFSTestUtil.writeFile(fs, TEST_FILE_PATH, TEST_FILE_DATA); LOG.info("Failing over to NN 0 in namespace " + nsIndex); cluster.transitionToStandby(nn1); cluster.transitionToActive(nn0); assertTrue(fs.exists(TEST_DIR)); assertEquals(TEST_FILE_DATA, DFSTestUtil.readFile(fs, TEST_FILE_PATH)); LOG.info("Removing test file"); fs.delete(TEST_DIR, true); assertFalse(fs.exists(TEST_DIR)); LOG.info("Failing over to NN 1 in namespace " + nsIndex); cluster.transitionToStandby(nn0); cluster.transitionToActive(nn1); assertFalse(fs.exists(TEST_DIR)); } /** * Tests manual failover back and forth between two NameNodes. */ @Test public void testManualFailoverAndFailback() throws Exception { Configuration conf = new Configuration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()) .numDataNodes(1) .build(); try { cluster.waitActive(); // test the only namespace testManualFailoverFailback(cluster, conf, 0); } finally { cluster.shutdown(); } } /** * Regression test for HDFS-2693: when doing state transitions, we need to * lock the FSNamesystem so that we don't end up doing any writes while it's * "in between" states. * This test case starts up several client threads which do mutation operations * while flipping a NN back and forth from active to standby. */ @Test(timeout=120000) public void testTransitionSynchronization() throws Exception { Configuration conf = new Configuration(); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()) .numDataNodes(0) .build(); try { cluster.waitActive(); ReentrantReadWriteLock spyLock = NameNodeAdapter.spyOnFsLock( cluster.getNameNode(0).getNamesystem()); Mockito.doAnswer(new GenericTestUtils.SleepAnswer(50)) .when(spyLock).writeLock(); final FileSystem fs = HATestUtil.configureFailoverFs( cluster, conf); TestContext ctx = new TestContext(); for (int i = 0; i < 50; i++) { final int finalI = i; ctx.addThread(new RepeatingTestThread(ctx) { @Override public void doAnAction() throws Exception { Path p = new Path("/test-" + finalI); fs.mkdirs(p); fs.delete(p, true); } }); } ctx.addThread(new RepeatingTestThread(ctx) { @Override public void doAnAction() throws Exception { cluster.transitionToStandby(0); Thread.sleep(50); cluster.transitionToActive(0); } }); ctx.startThreads(); ctx.waitFor(20000); ctx.stop(); } finally { cluster.shutdown(); } } /** * Test for HDFS-2812. Since lease renewals go from the client * only to the active NN, the SBN will have out-of-date lease * info when it becomes active. We need to make sure we don't * accidentally mark the leases as expired when the failover * proceeds. */ @Test(timeout=120000) public void testLeasesRenewedOnTransition() throws Exception { Configuration conf = new Configuration(); conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()) .numDataNodes(1) .build(); FSDataOutputStream stm = null; FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf); NameNode nn0 = cluster.getNameNode(0); NameNode nn1 = cluster.getNameNode(1); try { cluster.waitActive(); cluster.transitionToActive(0); LOG.info("Starting with NN 0 active"); stm = fs.create(TEST_FILE_PATH); long nn0t0 = NameNodeAdapter.getLeaseRenewalTime(nn0, TEST_FILE_STR); assertTrue(nn0t0 > 0); long nn1t0 = NameNodeAdapter.getLeaseRenewalTime(nn1, TEST_FILE_STR); assertEquals("Lease should not yet exist on nn1", -1, nn1t0); Thread.sleep(5); // make sure time advances! HATestUtil.waitForStandbyToCatchUp(nn0, nn1); long nn1t1 = NameNodeAdapter.getLeaseRenewalTime(nn1, TEST_FILE_STR); assertTrue("Lease should have been created on standby. Time was: " + nn1t1, nn1t1 > nn0t0); Thread.sleep(5); // make sure time advances! LOG.info("Failing over to NN 1"); cluster.transitionToStandby(0); cluster.transitionToActive(1); long nn1t2 = NameNodeAdapter.getLeaseRenewalTime(nn1, TEST_FILE_STR); assertTrue("Lease should have been renewed by failover process", nn1t2 > nn1t1); } finally { IOUtils.closeStream(stm); cluster.shutdown(); } } /** * Test that delegation tokens continue to work after the failover. */ @Test public void testDelegationTokensAfterFailover() throws IOException { Configuration conf = new Configuration(); conf.setBoolean( DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()) .numDataNodes(0) .build(); try { cluster.waitActive(); cluster.transitionToActive(0); NameNode nn1 = cluster.getNameNode(0); NameNode nn2 = cluster.getNameNode(1); String renewer = UserGroupInformation.getLoginUser().getUserName(); Token<DelegationTokenIdentifier> token = nn1.getRpcServer() .getDelegationToken(new Text(renewer)); LOG.info("Failing over to NN 1"); cluster.transitionToStandby(0); cluster.transitionToActive(1); nn2.getRpcServer().renewDelegationToken(token); nn2.getRpcServer().cancelDelegationToken(token); token = nn2.getRpcServer().getDelegationToken(new Text(renewer)); Assert.assertTrue(token != null); } finally { cluster.shutdown(); } } /** * Tests manual failover back and forth between two NameNodes * for federation cluster with two namespaces. */ @Test public void testManualFailoverFailbackFederationHA() throws Exception { Configuration conf = new Configuration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHAFederatedTopology(2)) .numDataNodes(1) .build(); try { cluster.waitActive(); // test for namespace 0 testManualFailoverFailback(cluster, conf, 0); // test for namespace 1 testManualFailoverFailback(cluster, conf, 1); } finally { cluster.shutdown(); } } @Test public void testFailoverWithEmptyInProgressEditLog() throws Exception { testFailoverAfterCrashDuringLogRoll(false); } @Test public void testFailoverWithEmptyInProgressEditLogWithHeader() throws Exception { testFailoverAfterCrashDuringLogRoll(true); } private static void testFailoverAfterCrashDuringLogRoll(boolean writeHeader) throws Exception { Configuration conf = new Configuration(); conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, Integer.MAX_VALUE); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()) .numDataNodes(0) .build(); FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf); try { cluster.transitionToActive(0); NameNode nn0 = cluster.getNameNode(0); nn0.getRpcServer().rollEditLog(); cluster.shutdownNameNode(0); createEmptyInProgressEditLog(cluster, nn0, writeHeader); cluster.transitionToActive(1); } finally { IOUtils.cleanup(LOG, fs); cluster.shutdown(); } } private static void createEmptyInProgressEditLog(MiniDFSCluster cluster, NameNode nn, boolean writeHeader) throws IOException { long txid = nn.getNamesystem().getEditLog().getLastWrittenTxId(); URI sharedEditsUri = cluster.getSharedEditsDir(0, 1); File sharedEditsDir = new File(sharedEditsUri.getPath()); StorageDirectory storageDir = new StorageDirectory(sharedEditsDir); File inProgressFile = NameNodeAdapter.getInProgressEditsFile(storageDir, txid + 1); assertTrue("Failed to create in-progress edits file", inProgressFile.createNewFile()); if (writeHeader) { DataOutputStream out = new DataOutputStream(new FileOutputStream( inProgressFile)); EditLogFileOutputStream.writeHeader(out); out.close(); } } /** * The secret manager needs to start/stop - the invariant should be that * the secret manager runs if and only if the NN is active and not in * safe mode. As a state diagram, we need to test all of the following * transitions to make sure the secret manager is started when we transition * into state 4, but none of the others. * <pre> * SafeMode Not SafeMode * Standby 1 <------> 2 * ^ ^ * | | * v v * Active 3 <------> 4 * </pre> */ @Test(timeout=60000) public void testSecretManagerState() throws Exception { Configuration conf = new Configuration(); conf.setBoolean( DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true); conf.setInt( DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY, 50); conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()) .numDataNodes(1) .waitSafeMode(false) .build(); try { cluster.transitionToActive(0); DFSTestUtil.createFile(cluster.getFileSystem(0), TEST_FILE_PATH, 6000, (short)1, 1L); cluster.getConfiguration(0).setInt( DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 60000); cluster.restartNameNode(0); NameNode nn = cluster.getNameNode(0); banner("Started in state 1."); assertTrue(nn.isStandbyState()); assertTrue(nn.isInSafeMode()); assertFalse(isDTRunning(nn)); banner("Transition 1->2. Should not start secret manager"); NameNodeAdapter.leaveSafeMode(nn); assertTrue(nn.isStandbyState()); assertFalse(nn.isInSafeMode()); assertFalse(isDTRunning(nn)); banner("Transition 2->1. Should not start secret manager."); NameNodeAdapter.enterSafeMode(nn, false); assertTrue(nn.isStandbyState()); assertTrue(nn.isInSafeMode()); assertFalse(isDTRunning(nn)); banner("Transition 1->3. Should not start secret manager."); nn.getRpcServer().transitionToActive(REQ_INFO); assertFalse(nn.isStandbyState()); assertTrue(nn.isInSafeMode()); assertFalse(isDTRunning(nn)); banner("Transition 3->1. Should not start secret manager."); nn.getRpcServer().transitionToStandby(REQ_INFO); assertTrue(nn.isStandbyState()); assertTrue(nn.isInSafeMode()); assertFalse(isDTRunning(nn)); banner("Transition 1->3->4. Should start secret manager."); nn.getRpcServer().transitionToActive(REQ_INFO); NameNodeAdapter.leaveSafeMode(nn); assertFalse(nn.isStandbyState()); assertFalse(nn.isInSafeMode()); assertTrue(isDTRunning(nn)); banner("Transition 4->3. Should stop secret manager"); NameNodeAdapter.enterSafeMode(nn, false); assertFalse(nn.isStandbyState()); assertTrue(nn.isInSafeMode()); assertFalse(isDTRunning(nn)); banner("Transition 3->4. Should start secret manager"); NameNodeAdapter.leaveSafeMode(nn); assertFalse(nn.isStandbyState()); assertFalse(nn.isInSafeMode()); assertTrue(isDTRunning(nn)); for (int i = 0; i < 20; i++) { // Loop the last check to suss out races. banner("Transition 4->2. Should stop secret manager."); nn.getRpcServer().transitionToStandby(REQ_INFO); assertTrue(nn.isStandbyState()); assertFalse(nn.isInSafeMode()); assertFalse(isDTRunning(nn)); banner("Transition 2->4. Should start secret manager"); nn.getRpcServer().transitionToActive(REQ_INFO); assertFalse(nn.isStandbyState()); assertFalse(nn.isInSafeMode()); assertTrue(isDTRunning(nn)); } } finally { cluster.shutdown(); } } private boolean isDTRunning(NameNode nn) { return NameNodeAdapter.getDtSecretManager(nn.getNamesystem()).isRunning(); } /** * Print a big banner in the test log to make debug easier. */ static void banner(String string) { LOG.info("\n\n\n\n================================================\n" + string + "\n" + "==================================================\n\n"); } }