TestHADataNodeFailure.java example

Explorer
hops-master
/*
 * Copyright (C) 2015 hops.io.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.server.namenode;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.AppendTestUtil;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.log4j.Level;
import org.junit.After;
import org.junit.Ignore;
import org.junit.Test;

import java.io.IOException;

/**
 * Tests the failover during load (read/write operations) when:
 * 1. Failover from Leader to namenode with next lowest Id
 * 2. When any other non-leader namenode crashes
 * 3. When datanodes crash
 * <p/>
 * In each of the test case, the following should be ensured
 * 1. The load should continue to run during failover
 * 2. No corrupt blocks are detected during load
 * 3. Failovers should perform successfully from the Leader namenode to the
 * namenode with the next lowest id
 */
@Ignore(value = "The design of this test needs to be reconsidered. " +
    "It fails most of the times because of race conditions.")
public class TestHADataNodeFailure extends junit.framework.TestCase {

  public static final Log LOG = LogFactory.getLog(TestHADataNodeFailure.class);

  {
    ((Log4JLogger) NameNode.stateChangeLog).getLogger().setLevel(Level.ALL);
    ((Log4JLogger) LeaseManager.LOG).getLogger().setLevel(Level.ALL);
    ((Log4JLogger) LogFactory.getLog(FSNamesystem.class)).getLogger()
        .setLevel(Level.ALL);
  }

  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = null;
  FileSystem fs = null;
  int NN1 = 0, NN2 = 1;
  static int NUM_NAMENODES = 2;
  static int NUM_DATANODES = 1;
  // 10 seconds timeout default
  long NNDeathTimeout = 10000;
  boolean writeInSameDir = false;
  boolean killNN = true;
  boolean waitFileisClosed = true;
  int fileCloseWaitTime = 5000;
  long waitReplicationTimeout = 5 * 60 * 1000;
  Path baseDir = new Path("/testsLoad");
  Writer[] writers = new Writer[25];

  private void setUp(int replicationFactor) throws IOException {
    // initialize the cluster with minimum 2 namenodes and minimum 6 datanodes
    if (NUM_NAMENODES < 2) {
      NUM_NAMENODES = 2;
    }

    this.conf = new Configuration();
    conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, replicationFactor);
    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
    conf.setLong(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY,
        15);
    conf.setInt(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 15000);

    cluster = new MiniDFSCluster.Builder(conf)
        .nnTopology(MiniDFSNNTopology.simpleHOPSTopology(NUM_NAMENODES))
        .format(true).numDataNodes(NUM_DATANODES).build();
    cluster.waitActive();
    fs = cluster.getNewFileSystemInstance(NN1);

    NNDeathTimeout =
        conf.getInt(DFSConfigKeys.DFS_LEADER_CHECK_INTERVAL_IN_MS_KEY,
            DFSConfigKeys.DFS_LEADER_CHECK_INTERVAL_IN_MS_DEFAULT) * (conf
            .getInt(DFSConfigKeys.DFS_LEADER_MISSED_HB_THRESHOLD_KEY,
                DFSConfigKeys.DFS_LEADER_MISSED_HB_THRESHOLD_DEFAULT) + 2);

    // create the directory namespace
    assertTrue(fs.mkdirs(baseDir));

    // create writers
    for (int i = 0; i < writers.length; i++) {
      writers[i] =
          new Writer(fs, new String("file" + i), writeInSameDir, baseDir,
              waitFileisClosed, fileCloseWaitTime);
    }
  }

  private void shutdown() {
    if (cluster != null) {
      cluster.shutdown();
    }
  }

  @Override
  @After
  public void tearDown() throws Exception {
    cluster.shutdown();
  }

  @Test(timeout = 900000)
  // Problem with this test case is that, although the new pipeline is setup  correctly
  // when it creates the next pipeline and starts to stream, another datanode crashes!! and then another.. etc.
  // it only retries when it detects an error while streaming to the datanodes in the pipeline
  // but not when adding a new datanode to the pipeline
  //[https://issues.apache.org/jira/browse/HDFS-3436]
  public void testFailoverWhenDNCrashesTest1() {
    // Testing with replication factor of 3
    short repFactor = 3;
    LOG.info(
        "TestNN Running test [testFailoverWhenDNCrashes()] with replication factor " +
            repFactor);
    failoverWhenDNCrashes(repFactor);
  }

  @Test(timeout = 900000)
  public void testFailoverWhenDNCrashesTest2() {
    // Testing with replication factor of 6
    short repFactor = 6;
    LOG.info(
        "Running test [testFailoverWhenDNCrashes()] with replication factor " +
            repFactor);
    failoverWhenDNCrashes(repFactor);
  }

  public void failoverWhenDNCrashes(short replicationFactor) {

    try {

      // reset this value
      int numDatanodesToKill = 0;
      if (replicationFactor == 3) {
        NUM_DATANODES = 5;
        numDatanodesToKill = 2;
      } else if (replicationFactor == 6) {
        NUM_DATANODES = 11;
        numDatanodesToKill = 5;
      }

      // setup the cluster with required replication factor
      setUp(replicationFactor);

      // save leader namenode port to restart with the same port
      int nnport = cluster.getNameNodePort(NN1);

      try {
        // writers start writing to their files
        Writer.startWriters(writers);

        // Give all the threads a chance to create their files and write something to it
        Thread.sleep(10000); // 10 sec

        // kill some datanodes (but make sure that we have atleast 6+)
        // The pipleline should be broken and setup again by the client
        
        for (int i = 0; i < numDatanodesToKill; i++) {
          // we need a way to ensure that atleast one valid replica is available
          int dnIndex = AppendTestUtil.nextInt(numDatanodesToKill);
          LOG.debug("TestNN DataNode Killed " +
              (cluster.getDataNodes().get(dnIndex).getDatanodeId()));
          cluster.stopDataNode(dnIndex);

          // wait for 15 seconds then kill the next datanode
          // New pipeline recovery takes place
          try {
            Thread.sleep(15000);
          } catch (InterruptedException ex) {
          }
        }
        

        // the load should still continue without any IO Exception thrown
        LOG.info("TestNN Wait a few seconds. Let them write some more");
        Thread.sleep(2000);

      } finally {
        // closing files - finalize all the files UC
        Writer.stopWriters(writers);
      }

      // the block report intervals would inform the namenode of under replicated blocks
      // hflush() and close() would guarantee replication at all datanodes. This is a confirmation
      Writer.waitReplication(fs, writers, replicationFactor,
          waitReplicationTimeout);
      
    } catch (Exception ex) {
      LOG.error("Received exception: " + ex.getMessage(), ex);
      ex.printStackTrace();
      fail("Exception: " + ex.getMessage());
    } finally {
      shutdown();
    }

  }
}