package io.eguan.dtx;
/*
* #%L
* Project eguan
* %%
* Copyright (C) 2012 - 2017 Oodrive
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import static io.eguan.dtx.DtxDummyRmFactory.DEFAULT_PAYLOAD;
import static io.eguan.dtx.DtxDummyRmFactory.DEFAULT_RES_UUID;
import static io.eguan.dtx.DtxMockUtils.verifyRollbackOnTx;
import static io.eguan.dtx.DtxMockUtils.verifySuccessfulTxExecution;
import static io.eguan.dtx.DtxResourceManagerState.UP_TO_DATE;
import static io.eguan.dtx.DtxTestHelper.newDtxManagerConfig;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.inOrder;
import static org.mockito.Mockito.timeout;
import static org.mockito.Mockito.verify;
import io.eguan.dtx.DtxManager;
import io.eguan.dtx.DtxManagerConfig;
import io.eguan.dtx.DtxNode;
import io.eguan.dtx.DtxResourceManager;
import io.eguan.dtx.DtxResourceManagerContext;
import io.eguan.dtx.TransactionManager;
import io.eguan.dtx.DtxDummyRmFactory.DtxResourceManagerBuilder;
import io.eguan.dtx.DtxDummyRmFactory.NodeShutdownPrepareAnswer;
import io.eguan.dtx.DtxDummyRmFactory.NodeShutdownStartAnswer;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CyclicBarrier;
import javax.transaction.xa.XAException;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
import org.junit.runners.model.InitializationError;
import org.mockito.InOrder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.hazelcast.core.Hazelcast;
import com.hazelcast.core.HazelcastInstance;
/**
* {@link Parameterized} test for scenarios where a part of the cluster shuts down during transaction execution.
*
* @author oodrive
* @author pwehrle
*
*/
@RunWith(Parameterized.class)
public final class TestHazelcastFailuresL {
private static final Logger LOGGER = LoggerFactory.getLogger(TestHazelcastFailuresL.class);
private static final int VERIFY_TIMEOUT_MS = 10000;
private static final int MIN_NB_OF_NODES = 2;
// maximum number of cluster nodes to test - increment if you've got time to spare
private static final int MAX_NB_OF_NODES = 4;
private final int nbOfNodes;
private final int nbOfShutdowns;
private final ArrayList<DtxManager> dtxManagers = new ArrayList<DtxManager>();
private final ArrayList<Path> tmpJournalDirs = new ArrayList<Path>();
private final ArrayList<DtxResourceManager> failResMgrList = new ArrayList<DtxResourceManager>();
private final ArrayList<DtxResourceManager> successResMgrList = new ArrayList<DtxResourceManager>();
/**
* Constructs a test instance.
*
* @param nbOfNodes
* the total number of nodes to add to the test cluster
* @param nbOfShutdowns
* the number of nodes to be shut down during transaction execution
*/
public TestHazelcastFailuresL(final Integer nbOfNodes, final Integer nbOfShutdowns) {
this.nbOfNodes = nbOfNodes.intValue();
this.nbOfShutdowns = nbOfShutdowns.intValue();
if (this.nbOfShutdowns >= this.nbOfNodes) {
throw new IllegalArgumentException("Number of shutdowns greater or equal than node count; nbOfNodes="
+ nbOfNodes + ", nbOfShutdowns=" + nbOfShutdowns);
}
if (this.nbOfShutdowns < 1) {
throw new IllegalArgumentException("Number of shutdowns must be at least 1; nbOfNodes=" + nbOfNodes
+ ", nbOfShutdowns=" + nbOfShutdowns);
}
}
/**
* Produces the parameter arrays with numbers of nodes and numbers of nodes to shut down.
*
* Note: Shutdown numbers always stop short of shutting down the whole cluster (taking care not to isolate the
* initiator).
*
* @return the list of parameter arrays
*/
@Parameters
public static final List<Object[]> data() {
final ArrayList<Object[]> result = new ArrayList<Object[]>();
for (int nbNodes = MIN_NB_OF_NODES; nbNodes <= MAX_NB_OF_NODES; nbNodes++) {
for (int nbShutdowns = 1; nbShutdowns < nbNodes; nbShutdowns++) {
result.add(new Object[] { Integer.valueOf(nbNodes), Integer.valueOf(nbShutdowns) });
}
}
return result;
}
/**
* Sets up the test cluster.
*
* @throws InitializationError
* if temporary directory creation fails
*/
@Before
public final void setUp() throws InitializationError {
final Set<DtxNode> peerList = DtxTestHelper.newRandomCluster(nbOfNodes);
// creates configurations and constructs DtxManager instances
for (final DtxNode currPeer : peerList) {
final Path journalDir;
try {
journalDir = Files.createTempDirectory(TestHazelcastFailuresL.class.getSimpleName());
tmpJournalDirs.add(journalDir);
}
catch (final IOException e) {
throw new InitializationError(e);
}
final ArrayList<DtxNode> otherPeers = new ArrayList<DtxNode>(peerList);
otherPeers.remove(currPeer);
final DtxManagerConfig dtxConfig = newDtxManagerConfig(currPeer, journalDir,
otherPeers.toArray(new DtxNode[otherPeers.size()]));
dtxManagers.add(new DtxManager(dtxConfig));
}
// initializes and starts the cluster nodes
int peerCount = 0;
for (final DtxManager currMgr : dtxManagers) {
currMgr.init();
currMgr.start();
peerCount++;
final HazelcastInstance hzInstance = Hazelcast.getHazelcastInstanceByName(currMgr.getNodeId().toString());
assertNotNull(hzInstance);
assertEquals(peerCount, hzInstance.getCluster().getMembers().size());
}
}
/**
* Tears down the test cluster.
*
* @throws InitializationError
* if file cleanup fails
*/
@After
public final void tearDown() throws InitializationError {
final ArrayList<Throwable> exceptionList = new ArrayList<Throwable>();
for (final DtxManager currMgr : dtxManagers) {
assertEquals(0, currMgr.getNbOfPendingRequests());
currMgr.stop();
currMgr.fini();
}
for (final Path currDir : tmpJournalDirs) {
try {
io.eguan.utils.Files.deleteRecursive(currDir);
}
catch (final IOException e) {
exceptionList.add(e);
}
}
if (!exceptionList.isEmpty()) {
throw new InitializationError(exceptionList);
}
}
/**
* Tests one transaction rolled back or partially committed due to a variable number of peers being shut down during
* the start phase.
*
* @throws XAException
* if setting up mock fails, not part of this test
*/
@Test
public final void testSubmitOneTxWithPeersShutdownOnStart() throws XAException {
LOGGER.info("Executing; nbOfNodes=" + this.nbOfNodes + ", nbOfShutdowns=" + this.nbOfShutdowns);
assertFalse(dtxManagers.isEmpty());
// adds resource managers with a failing proportion
int failCount = 0;
final CyclicBarrier syncBarrier = new CyclicBarrier(nbOfShutdowns);
for (final DtxManager currMgr : dtxManagers) {
final DtxResourceManager dtxResMgr;
if (failCount < this.nbOfShutdowns) {
final HazelcastInstance hzInstance = Hazelcast.getHazelcastInstanceByName(currMgr.getNodeId()
.toString());
assertNotNull(hzInstance);
dtxResMgr = new DtxResourceManagerBuilder().setId(DEFAULT_RES_UUID)
.setStart(null, new NodeShutdownStartAnswer(hzInstance.getLifecycleService(), syncBarrier))
.build();
failResMgrList.add(dtxResMgr);
failCount++;
}
else {
dtxResMgr = DtxDummyRmFactory.newResMgrThatDoesEverythingRight(DEFAULT_RES_UUID);
successResMgrList.add(dtxResMgr);
}
currMgr.registerResourceManager(dtxResMgr);
final TransactionManager currTxMgr = currMgr.getTxManager();
currTxMgr.setResManagerSyncState(DEFAULT_RES_UUID, UP_TO_DATE);
}
// submits to the last node, i.e. the one that will not be shut down
final DtxManager submitDtxMgr = dtxManagers.get(dtxManagers.size() - 1);
submitDtxMgr.submit(DEFAULT_RES_UUID, DEFAULT_PAYLOAD);
for (final DtxResourceManager failingResMgr : failResMgrList) {
verify(failingResMgr, timeout(VERIFY_TIMEOUT_MS)).start(any(byte[].class));
final InOrder inOrderFail = inOrder(failingResMgr);
inOrderFail.verify(failingResMgr).start(any(byte[].class));
inOrderFail.verifyNoMoreInteractions();
}
final boolean quorumOnline = submitDtxMgr.countsAsQuorum(nbOfNodes - failCount);
for (final DtxResourceManager succeedingResMgr : successResMgrList) {
if (!quorumOnline) {
LOGGER.debug("verifying rollback on res. manager nb " + successResMgrList.indexOf(succeedingResMgr));
verifyRollbackOnTx(succeedingResMgr, 1, false);
}
else {
LOGGER.debug("verifying commit on res. manager nb " + successResMgrList.indexOf(succeedingResMgr));
verifySuccessfulTxExecution(succeedingResMgr, 1);
}
}
}
/**
* Tests one transaction rolled back or partially committed due to a variable number of peers being shut down during
* the prepare phase.
*
* @throws XAException
* if setting up mock fails, not part of this test
*/
@Test
public final void testSubmitOneTxWithPeersShutdownOnPrepare() throws XAException {
LOGGER.info("Executing; nbOfNodes=" + this.nbOfNodes + ", nbOfShutdowns=" + this.nbOfShutdowns);
assertFalse(dtxManagers.isEmpty());
// adds resource managers with a failing proportion
int failCount = 0;
final CyclicBarrier syncBarrier = new CyclicBarrier(nbOfShutdowns);
for (final DtxManager currMgr : dtxManagers) {
final DtxResourceManager dtxResMgr;
if (failCount < this.nbOfShutdowns) {
final HazelcastInstance hzInstance = Hazelcast.getHazelcastInstanceByName(currMgr.getNodeId()
.toString());
assertNotNull(hzInstance);
dtxResMgr = new DtxResourceManagerBuilder().setId(DEFAULT_RES_UUID)
.setPrepare(null, new NodeShutdownPrepareAnswer(hzInstance.getLifecycleService(), syncBarrier))
.build();
failResMgrList.add(dtxResMgr);
failCount++;
}
else {
dtxResMgr = DtxDummyRmFactory.newResMgrThatDoesEverythingRight(DEFAULT_RES_UUID);
successResMgrList.add(dtxResMgr);
}
currMgr.registerResourceManager(dtxResMgr);
final TransactionManager currTxMgr = currMgr.getTxManager();
currTxMgr.setResManagerSyncState(DEFAULT_RES_UUID, UP_TO_DATE);
}
// submits to the last node, i.e. the one that will not be shut down
final DtxManager submitDtxMgr = dtxManagers.get(dtxManagers.size() - 1);
submitDtxMgr.submit(DEFAULT_RES_UUID, DEFAULT_PAYLOAD);
for (final DtxResourceManager failingResMgr : failResMgrList) {
verify(failingResMgr, timeout(VERIFY_TIMEOUT_MS)).prepare(any(DtxResourceManagerContext.class));
final InOrder inOrderFail = inOrder(failingResMgr);
inOrderFail.verify(failingResMgr).start(any(byte[].class));
inOrderFail.verify(failingResMgr).prepare(any(DtxResourceManagerContext.class));
inOrderFail.verifyNoMoreInteractions();
}
final boolean quorumOnline = submitDtxMgr.countsAsQuorum(nbOfNodes - failCount);
for (final DtxResourceManager succeedingResMgr : successResMgrList) {
if (!quorumOnline) {
LOGGER.debug("verifying rollback on res. manager nb " + successResMgrList.indexOf(succeedingResMgr));
verifyRollbackOnTx(succeedingResMgr, 1, true);
}
else {
LOGGER.debug("verifying commit on res. manager nb " + successResMgrList.indexOf(succeedingResMgr));
verifySuccessfulTxExecution(succeedingResMgr, 1);
}
}
}
}