/* * Copyright (C) 2015 hops.io. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package io.hops.leaderElection; import io.hops.exception.StorageException; import io.hops.exception.StorageInitializtionException; import io.hops.leaderElection.experiments.LightWeightNameNode; import io.hops.metadata.LEStorageFactory; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.log4j.Level; import org.apache.log4j.LogManager; import org.junit.After; import org.junit.Before; import org.junit.Test; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Random; import static org.junit.Assert.assertTrue; public class TestLEChurn { private static final Log LOG = LogFactory.getLog(TestLEChurn.class); List<LightWeightNameNode> nnList; private final int DFS_LEADER_CHECK_INTERVAL_IN_MS = 3 * 1000; private final int DFS_LEADER_MISSED_HB_THRESHOLD = 2; private final String HTTP_ADDRESS = "dummy.address.com:9999"; private final String RPC_ADDRESS = "repc.server.ip:0000"; private final String DRIVER_JAR = ""; private final String DRIVER_CLASS = "io.hops.metadata.ndb.NdbStorageFactory"; private final String DFS_STORAGE_DRIVER_CONFIG_FILE = "ndb-config.properties"; private final long TIME_PERIOD_INCREMENT = 200; @Before public void init() throws StorageInitializtionException, StorageException, IOException { LogManager.getRootLogger().setLevel(Level.ALL); nnList = new ArrayList<LightWeightNameNode>(); LEStorageFactory.setConfiguration(DRIVER_JAR, DRIVER_CLASS, DFS_STORAGE_DRIVER_CONFIG_FILE); LEStorageFactory.formatStorage(); VarsRegister.registerHdfsDefaultValues(); } @After public void tearDown() { //stop all NN LOG.debug("tearDown"); for (LightWeightNameNode nn : nnList) { nn.stop(); } } /** * Test leader election behavior under churn: start 10 NN then randomly stop, * restart existing NN and start new ones. Check that there is always at most * 1 leader and that the time without leader is never higher than expected. */ @Test public void testChurn() throws IOException, InterruptedException, CloneNotSupportedException { LOG.debug("start testChurn"); Random rand = new Random(0); List<LightWeightNameNode> activNNList = new ArrayList<LightWeightNameNode>(); List<LightWeightNameNode> stopedNNList = new ArrayList<LightWeightNameNode>(); int nbStartedNodes = 0; //create 10 NN for (int i = 0; i < 10; i++) { nbStartedNodes++; LightWeightNameNode nn = new LightWeightNameNode(new HdfsLeDescriptorFactory(), DFS_LEADER_CHECK_INTERVAL_IN_MS, DFS_LEADER_MISSED_HB_THRESHOLD, TIME_PERIOD_INCREMENT, HTTP_ADDRESS, RPC_ADDRESS); nnList.add(nn); activNNList.add(nn); } //verify that there is one and only one leader. int nbLeaders = 0; for (LightWeightNameNode nn : nnList) { if (nn.isLeader()) { nbLeaders++; } } assertTrue("there is no leader", nbLeaders > 0); assertTrue("there is more than one leader", nbLeaders == 1); long startingTime = System.currentTimeMillis(); String s = ""; while (System.currentTimeMillis() - startingTime < 5 * 60 * 1000) { //stop random number of random NN int nbStop = rand.nextInt(activNNList.size() - 1); for (int i = 0; i < nbStop; i++) { int nnId = rand.nextInt(activNNList.size()); s = s + activNNList.get(nnId).getLeCurrentId() + "; "; LOG.debug("Test : pausing " + activNNList.get(nnId).getLeCurrentId()); if (activNNList.get(nnId).getLeaderElectionInstance().isPaused()) { continue; } activNNList.get(nnId).getLeaderElectionInstance() .pauseFor(Long.MAX_VALUE); stopedNNList.add(activNNList.get(nnId)); activNNList.remove(nnId); } LOG.debug("suspended " + nbStop + " with ids: " + s); Thread.sleep(5000); //after dead locks //start random number of new NN int nbStart = rand.nextInt(10); if (nbStartedNodes + nbStart > 100) { nbStart = 100 - nbStartedNodes; } for (int i = 0; i < nbStart; i++) { nbStartedNodes++; LightWeightNameNode nn = new LightWeightNameNode(new HdfsLeDescriptorFactory(), DFS_LEADER_CHECK_INTERVAL_IN_MS, DFS_LEADER_MISSED_HB_THRESHOLD, TIME_PERIOD_INCREMENT, HTTP_ADDRESS, RPC_ADDRESS); nnList.add(nn); activNNList.add(nn); } //restart a random number of stoped NN int nbRestart = rand.nextInt(stopedNNList.size()); s = ""; for (int i = 0; i < nbRestart; i++) { int nnId = rand.nextInt(stopedNNList.size()); s = s + stopedNNList.get(nnId).getLeCurrentId() + "; "; stopedNNList.get(nnId).getLeaderElectionInstance().forceResume(); activNNList.add(stopedNNList.get(nnId)); stopedNNList.remove(nnId); } LOG.debug("restarted nodes with ids: " + s); //verify that there is at most one leader. //check that the time without leader is not too long long startWaitingForLeader = System.currentTimeMillis(); do { nbLeaders = 0; ArrayList<Long> leadersID = new ArrayList<Long>(); for (LightWeightNameNode nn : nnList) { if (nn.isLeader()) { nbLeaders++; leadersID.add(nn.getLeCurrentId()); } } if (nbLeaders > 1) { s = " "; for (long id : leadersID) { s = s + id + " "; } assertTrue( "there is more than one leader " + nbLeaders + "leaders: " + s, nbLeaders <= 1); } long timeWithoutLeader = System.currentTimeMillis() - startWaitingForLeader; assertTrue("the time without leader is too long " + timeWithoutLeader, timeWithoutLeader < ((DFS_LEADER_CHECK_INTERVAL_IN_MS * (DFS_LEADER_MISSED_HB_THRESHOLD + 5)) + 500)); // due to lock upgrade the upper time is has no limit } while (nbLeaders == 0); } } }