/*
* The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
* (the "License"). You may not use this work except in compliance with the License, which is
* available at www.apache.org/licenses/LICENSE-2.0
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied, as more fully set forth in the License.
*
* See the NOTICE file distributed with this work for information regarding copyright ownership.
*/
package alluxio.master;
import alluxio.AlluxioURI;
import alluxio.AuthenticatedUserRule;
import alluxio.Configuration;
import alluxio.ConfigurationTestUtils;
import alluxio.Constants;
import alluxio.PropertyKey;
import alluxio.SystemPropertyRule;
import alluxio.BaseIntegrationTest;
import alluxio.client.WriteType;
import alluxio.client.block.BlockWorkerClientTestUtils;
import alluxio.client.file.FileSystem;
import alluxio.client.file.FileSystemContext;
import alluxio.master.file.FileSystemMaster;
import alluxio.master.file.options.ListStatusOptions;
import alluxio.util.CommonUtils;
import alluxio.util.IdUtils;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Rule;
import org.junit.Test;
import java.io.IOException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
/**
* Test master journal for cluster terminating. Assert that test can replay the log and reproduce
* the correct state. Test both the single master (alluxio) and multi masters (alluxio-ft).
*/
public class JournalShutdownIntegrationTest extends BaseIntegrationTest {
@Rule
public AuthenticatedUserRule mAuthenticatedUser = new AuthenticatedUserRule("test");
private static final long SHUTDOWN_TIME_MS = 15 * Constants.SECOND_MS;
private static final String TEST_FILE_DIR = "/files/";
private static final int TEST_NUM_MASTERS = 3;
private static final long TEST_TIME_MS = Constants.SECOND_MS;
private ClientThread mCreateFileThread;
/** Executor for running client threads. */
private ExecutorService mExecutorsForClient;
@ClassRule
public static SystemPropertyRule sDisableHdfsCacheRule =
new SystemPropertyRule("fs.hdfs.impl.disable.cache", "true");
@After
public final void after() throws Exception {
mExecutorsForClient.shutdown();
ConfigurationTestUtils.resetConfiguration();
BlockWorkerClientTestUtils.reset();
FileSystemContext.INSTANCE.reset();
}
@Before
public final void before() throws Exception {
mExecutorsForClient = Executors.newFixedThreadPool(1);
Configuration.set(PropertyKey.MASTER_JOURNAL_TAILER_SHUTDOWN_QUIET_WAIT_TIME_MS, 100);
Configuration.set(PropertyKey.MASTER_JOURNAL_CHECKPOINT_PERIOD_ENTRIES, 2);
Configuration.set(PropertyKey.MASTER_JOURNAL_LOG_SIZE_BYTES_MAX, 32);
}
@Test
public void singleMasterJournalStopIntegration() throws Exception {
LocalAlluxioCluster cluster = setupSingleMasterCluster();
runCreateFileThread(cluster.getClient());
// Shutdown the cluster
cluster.stopFS();
CommonUtils.sleepMs(TEST_TIME_MS);
awaitClientTermination();
reproduceAndCheckState(mCreateFileThread.getSuccessNum());
// clean up
cluster.stopUFS();
}
@Test
public void multiMasterJournalStopIntegration() throws Exception {
MultiMasterLocalAlluxioCluster cluster = setupMultiMasterCluster();
runCreateFileThread(cluster.getClient());
// Kill the leader one by one.
for (int kills = 0; kills < TEST_NUM_MASTERS; kills++) {
cluster.waitForNewMaster(120 * Constants.SECOND_MS);
Assert.assertTrue(cluster.stopLeader());
}
cluster.stopFS();
awaitClientTermination();
reproduceAndCheckState(mCreateFileThread.getSuccessNum());
// clean up
cluster.stopUFS();
}
/**
* Waits for the client to terminate.
*/
private void awaitClientTermination() throws Exception {
// Ensure the client threads are stopped.
mExecutorsForClient.shutdownNow();
if (!mExecutorsForClient.awaitTermination(SHUTDOWN_TIME_MS, TimeUnit.MILLISECONDS)) {
throw new Exception("Client thread did not terminate");
}
}
/**
* Creates file system master from journal.
*/
private MasterRegistry createFsMasterFromJournal() throws Exception {
return MasterTestUtils.createLeaderFileSystemMasterFromJournal();
}
/**
* Reproduce the journal and check if the state is correct.
*/
private void reproduceAndCheckState(int successFiles) throws Exception {
Assert.assertNotEquals(successFiles, 0);
MasterRegistry registry = createFsMasterFromJournal();
FileSystemMaster fsMaster = registry.get(FileSystemMaster.class);
int actualFiles =
fsMaster.listStatus(new AlluxioURI(TEST_FILE_DIR), ListStatusOptions.defaults())
.size();
Assert.assertTrue((successFiles == actualFiles) || (successFiles + 1 == actualFiles));
for (int f = 0; f < successFiles; f++) {
Assert.assertTrue(
fsMaster.getFileId(new AlluxioURI(TEST_FILE_DIR + f)) != IdUtils.INVALID_FILE_ID);
}
registry.stop();
}
/**
* Sets up and starts a multi-master cluster.
*/
private MultiMasterLocalAlluxioCluster setupMultiMasterCluster() throws Exception {
// Setup and start the alluxio-ft cluster.
MultiMasterLocalAlluxioCluster cluster =
new MultiMasterLocalAlluxioCluster(TEST_NUM_MASTERS);
cluster.initConfiguration();
cluster.start();
return cluster;
}
/**
* Sets up and starts a single master cluster.
*/
private LocalAlluxioCluster setupSingleMasterCluster() throws Exception {
// Setup and start the local alluxio cluster.
LocalAlluxioCluster cluster = new LocalAlluxioCluster();
cluster.initConfiguration();
Configuration.set(PropertyKey.USER_FILE_WRITE_TYPE_DEFAULT, WriteType.MUST_CACHE);
cluster.start();
return cluster;
}
/**
* Starts a file-creating thread and runs it for some time, at least until it has created one
* file.
*
* @param fs a file system client to use for creating files
*/
private void runCreateFileThread(FileSystem fs) {
mCreateFileThread = new ClientThread(0, fs);
mExecutorsForClient.submit(mCreateFileThread);
CommonUtils.sleepMs(TEST_TIME_MS);
while (mCreateFileThread.getSuccessNum() == 0) {
CommonUtils.sleepMs(TEST_TIME_MS);
}
}
/**
* Hold a client and keep creating files.
*/
class ClientThread implements Runnable {
/** The number of successfully created files. */
private int mSuccessNum = 0;
private final int mOpType; // 0: create file
private final FileSystem mFileSystem;
/**
* Constructs the client thread.
*
* @param opType the create operation type
* @param fs a file system client to use for creating files
*/
public ClientThread(int opType, FileSystem fs) {
mOpType = opType;
mFileSystem = fs;
}
/**
* Gets the number of files which are successfully created.
*
* @return the number of files successfully created
*/
public int getSuccessNum() {
return mSuccessNum;
}
/**
* Keep creating files until something crashes or fail to create. Record how many files are
* created successfully.
*/
@Override
public void run() {
try {
// This infinity loop will be broken if something crashes or fail to create. This is
// expected since the master will shutdown at a certain time.
while (!Thread.interrupted()) {
if (mOpType == 0) {
try {
mFileSystem.createFile(new AlluxioURI(TEST_FILE_DIR + mSuccessNum)).close();
} catch (IOException e) {
break;
}
} else if (mOpType == 1) {
// TODO(gene): Add this back when there is new RawTable client API.
// if (mFileSystem.createRawTable(new AlluxioURI(TEST_TABLE_DIR + mSuccessNum), 1) ==
// -1) {
// break;
// }
}
// The create operation may succeed at the master side but still returns false due to the
// shutdown. So the mSuccessNum may be less than the actual success number.
mSuccessNum++;
CommonUtils.sleepMs(100);
}
} catch (Exception e) {
// Something crashed. Stop the thread.
}
}
}
}