/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import junit.framework.TestCase;
import org.junit.Test;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.util.InjectionEvent;
import org.apache.hadoop.util.InjectionEventI;
import org.apache.hadoop.util.InjectionHandler;
public class TestAutoEditRollWhenAvatarFailover extends AvatarSetupUtil {
final static Log LOG = LogFactory.getLog(TestAutoEditRollWhenAvatarFailover.class);
/**
* Test if we can get block locations after killing primary avatar,
* failing over to standby avatar (making it the new primary),
* restarting a new standby avatar, killing the new primary avatar and
* failing over to the restarted standby.
*
* Write logs for a while to make sure automatic rolling are triggered.
*/
@Test
public void testDoubleFailOverWithAutomaticRoll() throws Exception {
setUp(false, "testDoubleFailOverWithAutomaticRoll");
// To make sure it's never the case that both primary and standby
// issue rolling, we use a injection handler.
final AtomicBoolean startKeepThread = new AtomicBoolean(true);
final AtomicInteger countAutoRolled = new AtomicInteger(0);
final AtomicBoolean needFail = new AtomicBoolean(false);
final AtomicLong currentThreadId = new AtomicLong(-1);
final Object waitFor10Rolls = new Object();
InjectionHandler.set(new InjectionHandler() {
@Override
protected void _processEvent(InjectionEventI event, Object... args) {
if (event == InjectionEvent.FSEDIT_AFTER_AUTOMATIC_ROLL) {
countAutoRolled.incrementAndGet();
if (countAutoRolled.get() >= 10) {
synchronized (waitFor10Rolls) {
waitFor10Rolls.notifyAll();
}
}
if (!startKeepThread.get()) {
currentThreadId.set(-1);
} else if (currentThreadId.get() == -1) {
currentThreadId.set(Thread.currentThread().getId());
} else if (currentThreadId.get() != Thread.currentThread().getId()) {
LOG.warn("[Thread " + Thread.currentThread().getId()
+ "] expected: " + currentThreadId);
needFail.set(true);
}
LOG.info("[Thread " + Thread.currentThread().getId()
+ "] finish automatic log rolling, count "
+ countAutoRolled.get());
// Increase the rolling time a little bit once after 7 auto rolls
if (countAutoRolled.get() % 7 == 3) {
DFSTestUtil.waitNMilliSecond(75);
}
}
}
});
FileSystem fs = cluster.getFileSystem();
// Add some transactions during a period of time before failing over.
long startTime = System.currentTimeMillis();
for (int i = 0; i < 100; i++) {
fs.setTimes(new Path("/"), 0, 0);
DFSTestUtil.waitNMilliSecond(100);
if (i % 10 == 0) {
LOG.info("================== executed " + i + " queries");
}
if (countAutoRolled.get() >= 10) {
LOG.info("Automatic rolled 10 times.");
long duration = System.currentTimeMillis() - startTime;
TestCase.assertTrue("Automatic rolled 10 times in just " + duration
+ " msecs, which is too short", duration > 4500);
break;
}
}
TestCase.assertTrue("Only " + countAutoRolled
+ " automatic rolls triggered, which is lower than expected.",
countAutoRolled.get() >= 10);
// Tune the rolling timeout temporarily to avoid race conditions
// only triggered in tests
cluster.getPrimaryAvatar(0).avatar.namesystem.getFSImage().getEditLog()
.setTimeoutRollEdits(5000);
cluster.getStandbyAvatar(0).avatar.namesystem.getFSImage().getEditLog()
.setTimeoutRollEdits(5000);
LOG.info("================== killing primary 1");
cluster.killPrimary();
// Fail over and make sure after fail over, automatic edits roll still
// will happen.
countAutoRolled.set(0);
startKeepThread.set(false);
currentThreadId.set(-1);
LOG.info("================== failing over 1");
cluster.failOver();
cluster.getPrimaryAvatar(0).avatar.namesystem.getFSImage().getEditLog()
.setTimeoutRollEdits(1000);
LOG.info("================== restarting standby");
cluster.restartStandby();
cluster.getStandbyAvatar(0).avatar.namesystem.getFSImage().getEditLog()
.setTimeoutRollEdits(1000);
LOG.info("================== Finish restarting standby");
// Wait for automatic rolling happens if there is no new transaction.
startKeepThread.set(true);
startTime = System.currentTimeMillis();
long waitDeadLine = startTime + 20000;
synchronized (waitFor10Rolls) {
while (System.currentTimeMillis() < waitDeadLine
&& countAutoRolled.get() < 10) {
waitFor10Rolls.wait(waitDeadLine - System.currentTimeMillis());
}
}
TestCase.assertTrue("Only " + countAutoRolled
+ " automatic rolls triggered, which is lower than expected.",
countAutoRolled.get() >= 10);
long duration = System.currentTimeMillis() - startTime;
TestCase.assertTrue("Automatic rolled 10 times in just " + duration
+ " msecs", duration > 9000);
// failover back
countAutoRolled.set(0);
startKeepThread.set(false);
currentThreadId.set(-1);
cluster.getPrimaryAvatar(0).avatar.namesystem.getFSImage().getEditLog()
.setTimeoutRollEdits(6000);
cluster.getStandbyAvatar(0).avatar.namesystem.getFSImage().getEditLog()
.setTimeoutRollEdits(6000);
LOG.info("================== killing primary 2");
cluster.killPrimary();
LOG.info("================== failing over 2");
cluster.failOver();
cluster.getPrimaryAvatar(0).avatar.namesystem.getFSImage().getEditLog()
.setTimeoutRollEdits(1000);
// Make sure after failover back, automatic rolling can still happen.
startKeepThread.set(true);
for (int i = 0; i < 100; i++) {
fs.setTimes(new Path("/"), 0, 0);
DFSTestUtil.waitNMilliSecond(200);
if (i % 10 == 0) {
LOG.info("================== executed " + i + " queries");
}
if (countAutoRolled.get() > 10) {
LOG.info("Automatic rolled 10 times.");
duration = System.currentTimeMillis() - startTime;
TestCase.assertTrue("Automatic rolled 10 times in just " + duration
+ " msecs, which is too short", duration > 9000);
break;
}
}
TestCase.assertTrue("Only " + countAutoRolled
+ " automatic rolls triggered, which is lower than expected.",
countAutoRolled.get() >= 10);
InjectionHandler.clear();
if (needFail.get()) {
TestCase
.fail("Automatic rolling doesn't happen in the same thread when should.");
}
}
}