TestHAFileCreation.java example

Explorer
hops-master
package org.apache.hadoop.hdfs.server.namenode;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.log4j.Level;
import org.junit.Ignore;
import org.junit.Test;

import java.io.IOException;

/* We have no control over the order in which the datanodes and clients communicate with the namenode. 
 * Consider a scenario where the client is writing to a file. 
 * All the blocks have reached minimum replication level. 
 * The datanode have not yet reported all the replicas. 
 * The namenode will close the file and mark the blocks that have not yet been fully replicated as under replicated. 
 * Replication monitor starts the replication process for the under-replicated blocks. 
 * 
 * In this test number of datanode is equal to replication level of the file. The replication monitor
 * tris to create new replicas on datanode that already have a copy of the replica (not yet reported to datanode). This results
 * in ReplicaAlreadyExists Exception. 
 * 
 * The default placement policy also throws warnings like Not able to place enough replicas
 * 
 * if too many threads are created then sometimes the data length is not consistent. In such case look for java.io.IOException: Too many open files exception in the log 
 *
 */
@Ignore(value = "The design of this test needs to be reconsidered. " +
    "It fails most of the times because of race conditions.")
public class TestHAFileCreation extends junit.framework.TestCase {

  public static final Log LOG = LogFactory.getLog(TestHAFileCreation.class);

  {
    ((Log4JLogger) NameNode.stateChangeLog).getLogger().setLevel(Level.ALL);
    ((Log4JLogger) LeaseManager.LOG).getLogger().setLevel(Level.ALL);
    ((Log4JLogger) LogFactory.getLog(FSNamesystem.class)).getLogger()
        .setLevel(Level.ALL);
  }

  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = null;
  FileSystem fs = null;
  int NN1 = 0, NN2 = 1;
  static int NUM_NAMENODES = 2;
  static int NUM_DATANODES = 3;
  // 10 seconds timeout default
  long NNDeathTimeout = 10000;
  boolean writeInSameDir = true;
  boolean killNN = true;
  boolean waitFileisClosed = true;
  int fileCloseWaitTime = 5000;
  int waitReplicationTimeout = 5 * 60 * 1000;
  Path baseDir = new Path("/testsLoad");
  Writer[] writers = new Writer[50];

  private void setupCluster(int replicationFactor) throws IOException {
    // initialize the cluster with minimum 2 namenodes and minimum 6 datanodes
    if (NUM_NAMENODES < 2) {
      NUM_NAMENODES = 2;
    }

    if (replicationFactor > NUM_DATANODES) {
      NUM_DATANODES = replicationFactor;
    }

    this.conf = new Configuration();
    conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, replicationFactor);
    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
    conf.setLong(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY,
        15);

    cluster = new MiniDFSCluster.Builder(conf)
        .nnTopology(MiniDFSNNTopology.simpleHOPSTopology(NUM_NAMENODES))
        .format(true).numDataNodes(NUM_DATANODES).build();
    cluster.waitActive();

    LOG.debug(
        "NN1 address is " + cluster.getNameNode(NN1).getNameNodeAddress() +
            " ld: " + cluster.getNameNode(NN1).isLeader() + " NN2 address is " +
            cluster.getNameNode(NN2).getNameNodeAddress() + " ld: " +
            cluster.getNameNode(NN2).isLeader());

    fs = cluster.getNewFileSystemInstance(NN1);

    NNDeathTimeout =
        conf.getInt(DFSConfigKeys.DFS_LEADER_CHECK_INTERVAL_IN_MS_KEY,
            DFSConfigKeys.DFS_LEADER_CHECK_INTERVAL_IN_MS_DEFAULT) * (conf
            .getInt(DFSConfigKeys.DFS_LEADER_MISSED_HB_THRESHOLD_KEY,
                DFSConfigKeys.DFS_LEADER_MISSED_HB_THRESHOLD_DEFAULT) + 2);

    // create the directory namespace
    assertTrue(fs.mkdirs(baseDir));

    // create writers
    for (int i = 0; i < writers.length; i++) {
      writers[i] =
          new Writer(fs, new String("file" + i), writeInSameDir, baseDir,
              waitFileisClosed, fileCloseWaitTime);
    }


  }

  private void shutdown() {
    if (cluster != null) {
      cluster.shutdown();
    }
  }


  /**
   * Under load perform failover by killing leader NN1 NN2 will be active and
   * loads are now processed by NN2 Load should still continue No corrupt
   * blocks should be reported
   */
  @Test
  public void testFailoverWhenLeaderNNCrashesTest1() {
    // Testing with replication factor of 3
    short repFactor = 3;
    LOG.info(
        "Running test [testFailoverWhenLeaderNNCrashes()] with replication factor " +
            repFactor);
    failoverWhenLeaderNNCrashes(repFactor);
  }

  @Test
  public void testFailoverWhenLeaderNNCrashesTest2() {
    // Testing with replication factor of 3
    short repFactor = 6;
    LOG.info(
        "Running test [testFailoverWhenLeaderNNCrashes()] with replication factor " +
            repFactor);
    failoverWhenLeaderNNCrashes(repFactor);
  }

  private void failoverWhenLeaderNNCrashes(short replicationFactor) {
    try {
      // setup the cluster with required replication factor
      setupCluster(replicationFactor);

      // save leader namenode port to restart with the same port
      int nnport = cluster.getNameNodePort(NN1);

      try {
        // writers start writing to their files
        Writer.startWriters(writers);

        // Give all the threads a chance to create their files and write something to it
        Thread.sleep(10000);

        LOG.debug("TestNN about to shutdown the namenode with address " +
            cluster.getNameNode(NN1).getNameNodeAddress());
        // kill leader NN1
        if (killNN) {
          cluster.shutdownNameNode(NN1);
          LOG.debug("TestNN KILLED Namenode with address ");
          TestHABasicFailover.waitLeaderElection(cluster.getDataNodes(),
              cluster.getNameNode(NN2), NNDeathTimeout);
          // Check NN2 is the leader and failover is detected
          assertTrue("TestNN NN2 is expected to be the leader, but is not",
              cluster.getNameNode(NN2).isLeader());
          assertTrue("TestNN Not all datanodes detected the new leader",
              TestHABasicFailover
                  .doesDataNodesRecognizeLeader(cluster.getDataNodes(),
                      cluster.getNameNode(NN2)));

        }

        // the load should still continue without any IO Exception thrown
        LOG.info("TestNN Wait a few seconds. Let them write some more");
        Thread.sleep(10000);

      } finally {
        Writer.stopWriters(writers);
      }
      LOG.debug("TestNN All File Should Have been closed");
      Writer.verifyFile(writers, fs);
      // the block report intervals would inform the namenode of under replicated blocks
      // hflush() and close() would guarantee replication at all datanodes. This is a confirmation
      Writer.waitReplication(fs, writers, replicationFactor,
          waitReplicationTimeout);

      if (true) {
        return;
      }
      // restart the cluster without formatting using same ports and same configurations
      cluster.shutdown();
      cluster =
          new MiniDFSCluster.Builder(conf).nameNodePort(nnport).format(false)
              .nnTopology(MiniDFSNNTopology.simpleHOPSTopology(NUM_NAMENODES))
              .numDataNodes(NUM_DATANODES).build();
      cluster.waitActive();

      // update the client so that it has the fresh list of namenodes. Black listed namenodes will be removed
      fs = cluster.getNewFileSystemInstance(NN1);

      Writer.verifyFile(writers,
          fs); // throws IOException. Should be caught by parent
    } catch (Exception ex) {
      LOG.error("Received exception: " + ex.getMessage(), ex);
      ex.printStackTrace();
      fail("Exception: " + ex.getMessage());
    } finally {
      shutdown();
    }

  }
}