/*
* ProActive Parallel Suite(TM):
* The Open Source library for parallel and distributed
* Workflows & Scheduling, Orchestration, Cloud Automation
* and Big Data Analysis on Enterprise Grids & Clouds.
*
* Copyright (c) 2007 - 2017 ActiveEon
* Contact: contact@activeeon.com
*
* This library is free software: you can redistribute it and/or
* modify it under the terms of the GNU Affero General Public License
* as published by the Free Software Foundation: version 3 of
* the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* If needed, contact us to obtain a release under GPL Version 2 or 3
* or a different license than the AGPL.
*/
package functionaltests.job.error;
import static functionaltests.utils.SchedulerTHelper.log;
import static org.junit.Assert.*;
import java.io.File;
import java.io.Serializable;
import java.nio.file.Path;
import org.junit.BeforeClass;
import org.junit.Test;
import org.ow2.proactive.resourcemanager.common.NodeState;
import org.ow2.proactive.resourcemanager.common.event.RMEventType;
import org.ow2.proactive.resourcemanager.common.event.RMNodeEvent;
import org.ow2.proactive.scheduler.common.Scheduler;
import org.ow2.proactive.scheduler.common.job.JobId;
import org.ow2.proactive.scheduler.common.job.JobResult;
import org.ow2.proactive.scheduler.common.job.TaskFlowJob;
import org.ow2.proactive.scheduler.common.task.JavaTask;
import org.ow2.proactive.scheduler.common.task.OnTaskError;
import org.ow2.proactive.scheduler.common.task.TaskResult;
import org.ow2.proactive.scheduler.common.task.executable.JavaExecutable;
import org.ow2.proactive.scheduler.util.FileLock;
import functionaltests.utils.SchedulerFunctionalTestWithCustomConfigAndRestart;
import functionaltests.utils.SchedulerTHelper;
import functionaltests.utils.TestNode;
/**
* Test checks that scheduler restarts task if Node executing
* task was killed during task execution.
*
* @author ProActive team
*
*/
public class TestTaskRestartOnNodeFailure extends SchedulerFunctionalTestWithCustomConfigAndRestart {
private static final long TIMEOUT = 60000;
public static class TestJavaTask extends JavaExecutable {
private String fileLockPath;
@Override
public Serializable execute(TaskResult... results) throws Throwable {
getOut().println("OK");
FileLock.waitUntilUnlocked(fileLockPath);
return "OK";
}
}
/**
* Starts an scheduler in non-fork mode with an empty rm
*
* @throws Exception
*/
@BeforeClass
public static void startDedicatedScheduler() throws Exception {
schedulerHelper = new SchedulerTHelper(false,
new File(SchedulerTHelper.class.getResource("/functionaltests/config/scheduler-nonforkedscripttasks.ini")
.toURI()).getAbsolutePath(),
null,
null);
}
@Test
public void testRestart() throws Exception {
FileLock fileLock = new FileLock();
testTaskKillNode(fileLock, false);
testTaskKillNode(fileLock, true);
}
private void testTaskKillNode(FileLock fileLock, boolean waitBeforeKill) throws Exception {
Path fileLockPath = fileLock.lock();
TestNode nodeToKill = startNode();
log("Submit job");
final JobId jobId = schedulerHelper.submitJob(createJob(fileLockPath.toString()));
log("Wait when node becomes busy");
RMNodeEvent event;
do {
event = schedulerHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED, TIMEOUT);
} while (!event.getNodeState().equals(NodeState.BUSY));
log("Wait when task starts");
schedulerHelper.waitForEventTaskRunning(jobId, "Test task");
/*
* Want to test two cases (existed at the time of this writing): - if wait some time before
* killing node then node failure is detected by the pinger thread - if kill node
* immediately then node failure is detected by the thread calling TaskLauncher.doTask
*/
if (waitBeforeKill) {
log("Wait some time");
Thread.sleep(5000);
}
log("Stop task node process (node " + nodeToKill.getNode().getNodeInformation().getURL() + ")");
nodeToKill.kill();
TestNode newNode = startNode();
log("Let task finish");
fileLock.unlock();
log("Wait when job finish");
schedulerHelper.waitForEventJobFinished(jobId, TIMEOUT);
event = schedulerHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED,
newNode.getNode().getNodeInformation().getURL(),
TIMEOUT);
assertEquals(NodeState.BUSY, event.getNodeState());
event = schedulerHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED,
newNode.getNode().getNodeInformation().getURL(),
TIMEOUT);
assertEquals(NodeState.FREE, event.getNodeState());
log("Check job result");
checkJobResult(schedulerHelper.getSchedulerInterface(), jobId);
schedulerHelper.getResourceManager().removeNode(newNode.getNodeURL(), true);
newNode.kill();
}
private static int startedNodesCounter;
private TestNode startNode() throws Exception {
int nodeNumber = startedNodesCounter++;
log("Start new node: node-" + nodeNumber);
testNode = schedulerHelper.createNode("node" + nodeNumber);
String nodeUrl = testNode.getNode().getNodeInformation().getURL();
schedulerHelper.getResourceManager().addNode(nodeUrl);
schedulerHelper.waitForNodeEvent(RMEventType.NODE_ADDED, nodeUrl, TIMEOUT);
RMNodeEvent event = schedulerHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, nodeUrl, TIMEOUT);
assertEquals(NodeState.FREE, event.getNodeState());
return testNode;
}
private TaskFlowJob createJob(String communicationObjectUrl) throws Exception {
TaskFlowJob job = new TaskFlowJob();
job.setName(this.getClass().getSimpleName());
job.setOnTaskError(OnTaskError.CANCEL_JOB);
job.setMaxNumberOfExecution(1);
JavaTask javaTask = new JavaTask();
javaTask.setExecutableClassName(TestJavaTask.class.getName());
javaTask.setMaxNumberOfExecution(1);
javaTask.setOnTaskError(OnTaskError.CANCEL_JOB);
javaTask.setName("Test task");
javaTask.addArgument("fileLockPath", communicationObjectUrl);
job.addTask(javaTask);
return job;
}
private void checkJobResult(Scheduler scheduler, JobId jobId) throws Exception {
JobResult jobResult = scheduler.getJobResult(jobId);
assertEquals("Unexpected number of task results", 1, jobResult.getAllResults().size());
for (TaskResult taskResult : jobResult.getAllResults().values()) {
log("Task " + taskResult.getTaskId());
assertNull("Unexpected task result exception", taskResult.getException());
String output = taskResult.getOutput().getAllLogs(false);
log("Task output:");
log(output);
assertTrue("Unxepected output", output.contains("OK"));
}
}
}