/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.nephele.jobmanager.scheduler;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import eu.stratosphere.nephele.execution.ExecutionState;
import eu.stratosphere.nephele.executiongraph.ExecutionEdge;
import eu.stratosphere.nephele.executiongraph.ExecutionGate;
import eu.stratosphere.nephele.executiongraph.ExecutionGraph;
import eu.stratosphere.nephele.executiongraph.ExecutionGraphIterator;
import eu.stratosphere.nephele.executiongraph.ExecutionGroupVertex;
import eu.stratosphere.nephele.executiongraph.ExecutionGroupVertexIterator;
import eu.stratosphere.nephele.executiongraph.ExecutionPipeline;
import eu.stratosphere.nephele.executiongraph.ExecutionStage;
import eu.stratosphere.nephele.executiongraph.ExecutionVertex;
import eu.stratosphere.nephele.executiongraph.ExecutionVertexID;
import eu.stratosphere.nephele.executiongraph.InternalJobStatus;
import eu.stratosphere.nephele.instance.AbstractInstance;
import eu.stratosphere.nephele.instance.AllocatedResource;
import eu.stratosphere.nephele.instance.AllocationID;
import eu.stratosphere.nephele.instance.DummyInstance;
import eu.stratosphere.nephele.instance.InstanceException;
import eu.stratosphere.nephele.instance.InstanceListener;
import eu.stratosphere.nephele.instance.InstanceManager;
import eu.stratosphere.nephele.instance.InstanceRequestMap;
import eu.stratosphere.nephele.instance.InstanceType;
import eu.stratosphere.nephele.jobgraph.JobID;
import eu.stratosphere.nephele.jobmanager.DeploymentManager;
import eu.stratosphere.util.StringUtils;
/**
* This abstract scheduler must be extended by a scheduler implementations for Nephele. The abstract class defines the
* fundamental methods for scheduling and removing jobs. While Nephele's
* {@link eu.stratosphere.nephele.jobmanager.JobManager} is responsible for requesting the required instances for the
* job at the {@link eu.stratosphere.nephele.instance.InstanceManager}, the scheduler is in charge of assigning the
* individual tasks to the instances.
*
*/
public abstract class AbstractScheduler implements InstanceListener {
/**
* The LOG object to report events within the scheduler.
*/
protected static final Log LOG = LogFactory.getLog(AbstractScheduler.class);
/**
* The instance manager assigned to this scheduler.
*/
private final InstanceManager instanceManager;
/**
* The deployment manager assigned to this scheduler.
*/
private final DeploymentManager deploymentManager;
/**
* Stores the vertices to be restarted once they have switched to the <code>CANCELED</code> state.
*/
private final Map<ExecutionVertexID, ExecutionVertex> verticesToBeRestarted = new ConcurrentHashMap<ExecutionVertexID, ExecutionVertex>();
/**
* Constructs a new abstract scheduler.
*
* @param deploymentManager
* the deployment manager assigned to this scheduler
* @param instanceManager
* the instance manager to be used with this scheduler
*/
protected AbstractScheduler(final DeploymentManager deploymentManager, final InstanceManager instanceManager) {
this.deploymentManager = deploymentManager;
this.instanceManager = instanceManager;
this.instanceManager.setInstanceListener(this);
}
/**
* Adds a job represented by an {@link ExecutionGraph} object to the scheduler. The job is then executed according
* to the strategies of the concrete scheduler implementation.
*
* @param executionGraph
* the job to be added to the scheduler
* @throws SchedulingException
* thrown if an error occurs and the scheduler does not accept the new job
*/
public abstract void schedulJob(ExecutionGraph executionGraph) throws SchedulingException;
/**
* Returns the execution graph which is associated with the given job ID.
*
* @param jobID
* the job ID to search the execution graph for
* @return the execution graph which belongs to the given job ID or <code>null</code if no such execution graph
* exists
*/
public abstract ExecutionGraph getExecutionGraphByID(JobID jobID);
/**
* Returns the {@link InstanceManager} object which is used by the current scheduler.
*
* @return the {@link InstanceManager} object which is used by the current scheduler
*/
public InstanceManager getInstanceManager() {
return this.instanceManager;
}
// void removeJob(JobID jobID);
/**
* Shuts the scheduler down. After shut down no jobs can be added to the scheduler.
*/
public abstract void shutdown();
/**
* Collects the instances required to run the job from the given {@link ExecutionStage} and requests them at the
* loaded instance manager.
*
* @param executionStage
* the execution stage to collect the required instances from
* @throws InstanceException
* thrown if the given execution graph is already processing its final stage
*/
protected void requestInstances(final ExecutionStage executionStage) throws InstanceException {
final ExecutionGraph executionGraph = executionStage.getExecutionGraph();
final InstanceRequestMap instanceRequestMap = new InstanceRequestMap();
synchronized (executionStage) {
executionStage.collectRequiredInstanceTypes(instanceRequestMap, ExecutionState.CREATED);
final Iterator<Map.Entry<InstanceType, Integer>> it = instanceRequestMap.getMinimumIterator();
LOG.info("Requesting the following instances for job " + executionGraph.getJobID());
while (it.hasNext()) {
final Map.Entry<InstanceType, Integer> entry = it.next();
LOG.info(" " + entry.getKey() + " [" + entry.getValue().intValue() + ", "
+ instanceRequestMap.getMaximumNumberOfInstances(entry.getKey()) + "]");
}
if (instanceRequestMap.isEmpty()) {
return;
}
this.instanceManager.requestInstance(executionGraph.getJobID(), executionGraph.getJobConfiguration(),
instanceRequestMap, null);
// Switch vertex state to assigning
final ExecutionGraphIterator it2 = new ExecutionGraphIterator(executionGraph, executionGraph
.getIndexOfCurrentExecutionStage(), true, true);
while (it2.hasNext()) {
it2.next().compareAndUpdateExecutionState(ExecutionState.CREATED, ExecutionState.SCHEDULED);
}
}
}
void findVerticesToBeDeployed(final ExecutionVertex vertex,
final Map<AbstractInstance, List<ExecutionVertex>> verticesToBeDeployed,
final Set<ExecutionVertex> alreadyVisited) {
if (!alreadyVisited.add(vertex)) {
return;
}
if (vertex.compareAndUpdateExecutionState(ExecutionState.ASSIGNED, ExecutionState.READY)) {
final AbstractInstance instance = vertex.getAllocatedResource().getInstance();
if (instance instanceof DummyInstance) {
LOG.error("Inconsistency: Vertex " + vertex + " is about to be deployed on a DummyInstance");
}
List<ExecutionVertex> verticesForInstance = verticesToBeDeployed.get(instance);
if (verticesForInstance == null) {
verticesForInstance = new ArrayList<ExecutionVertex>();
verticesToBeDeployed.put(instance, verticesForInstance);
}
verticesForInstance.add(vertex);
}
final int numberOfOutputGates = vertex.getNumberOfOutputGates();
for (int i = 0; i < numberOfOutputGates; ++i) {
final ExecutionGate outputGate = vertex.getOutputGate(i);
boolean deployTarget;
switch (outputGate.getChannelType()) {
case NETWORK:
deployTarget = false;
break;
case IN_MEMORY:
deployTarget = true;
break;
default:
throw new IllegalStateException("Unknown channel type");
}
if (deployTarget) {
final int numberOfOutputChannels = outputGate.getNumberOfEdges();
for (int j = 0; j < numberOfOutputChannels; ++j) {
final ExecutionEdge outputChannel = outputGate.getEdge(j);
final ExecutionVertex connectedVertex = outputChannel.getInputGate().getVertex();
findVerticesToBeDeployed(connectedVertex, verticesToBeDeployed, alreadyVisited);
}
}
}
}
/**
* Collects all execution vertices with the state ASSIGNED starting from the given start vertex and
* deploys them on the assigned {@link AllocatedResource} objects.
*
* @param startVertex
* the execution vertex to start the deployment from
*/
public void deployAssignedVertices(final ExecutionVertex startVertex) {
final JobID jobID = startVertex.getExecutionGraph().getJobID();
final Map<AbstractInstance, List<ExecutionVertex>> verticesToBeDeployed = new HashMap<AbstractInstance, List<ExecutionVertex>>();
final Set<ExecutionVertex> alreadyVisited = new HashSet<ExecutionVertex>();
findVerticesToBeDeployed(startVertex, verticesToBeDeployed, alreadyVisited);
if (!verticesToBeDeployed.isEmpty()) {
final Iterator<Map.Entry<AbstractInstance, List<ExecutionVertex>>> it2 = verticesToBeDeployed
.entrySet()
.iterator();
while (it2.hasNext()) {
final Map.Entry<AbstractInstance, List<ExecutionVertex>> entry = it2.next();
this.deploymentManager.deploy(jobID, entry.getKey(), entry.getValue());
}
}
}
/**
* Collects all execution vertices with the state ASSIGNED from the given pipeline and deploys them on the assigned
* {@link AllocatedResource} objects.
*
* @param pipeline
* the execution pipeline to be deployed
*/
public void deployAssignedPipeline(final ExecutionPipeline pipeline) {
final JobID jobID = null;
final Map<AbstractInstance, List<ExecutionVertex>> verticesToBeDeployed = new HashMap<AbstractInstance, List<ExecutionVertex>>();
final Set<ExecutionVertex> alreadyVisited = new HashSet<ExecutionVertex>();
final Iterator<ExecutionVertex> it = pipeline.iterator();
while (it.hasNext()) {
findVerticesToBeDeployed(it.next(), verticesToBeDeployed, alreadyVisited);
}
if (!verticesToBeDeployed.isEmpty()) {
final Iterator<Map.Entry<AbstractInstance, List<ExecutionVertex>>> it2 = verticesToBeDeployed
.entrySet()
.iterator();
while (it2.hasNext()) {
final Map.Entry<AbstractInstance, List<ExecutionVertex>> entry = it2.next();
this.deploymentManager.deploy(jobID, entry.getKey(), entry.getValue());
}
}
}
/**
* Collects all execution vertices with the state ASSIGNED starting from the given collection of start vertices and
* deploys them on the assigned {@link AllocatedResource} objects.
*
* @param startVertices
* the collection of execution vertices to start the deployment from
*/
public void deployAssignedVertices(final Collection<ExecutionVertex> startVertices) {
JobID jobID = null;
final Map<AbstractInstance, List<ExecutionVertex>> verticesToBeDeployed = new HashMap<AbstractInstance, List<ExecutionVertex>>();
final Set<ExecutionVertex> alreadyVisited = new HashSet<ExecutionVertex>();
for (final ExecutionVertex startVertex : startVertices) {
if (jobID == null) {
jobID = startVertex.getExecutionGraph().getJobID();
}
findVerticesToBeDeployed(startVertex, verticesToBeDeployed, alreadyVisited);
}
if (!verticesToBeDeployed.isEmpty()) {
final Iterator<Map.Entry<AbstractInstance, List<ExecutionVertex>>> it2 = verticesToBeDeployed
.entrySet()
.iterator();
while (it2.hasNext()) {
final Map.Entry<AbstractInstance, List<ExecutionVertex>> entry = it2.next();
this.deploymentManager.deploy(jobID, entry.getKey(), entry.getValue());
}
}
}
/**
* Collects all execution vertices with the state ASSIGNED starting from the input vertices of the current execution
* stage and deploys them on the assigned {@link AllocatedResource} objects.
*
* @param executionGraph
* the execution graph to collect the vertices from
*/
public void deployAssignedInputVertices(final ExecutionGraph executionGraph) {
final Map<AbstractInstance, List<ExecutionVertex>> verticesToBeDeployed = new HashMap<AbstractInstance, List<ExecutionVertex>>();
final ExecutionStage executionStage = executionGraph.getCurrentExecutionStage();
final Set<ExecutionVertex> alreadyVisited = new HashSet<ExecutionVertex>();
for (int i = 0; i < executionStage.getNumberOfStageMembers(); ++i) {
final ExecutionGroupVertex startVertex = executionStage.getStageMember(i);
if (!startVertex.isInputVertex()) {
continue;
}
for (int j = 0; j < startVertex.getCurrentNumberOfGroupMembers(); ++j) {
final ExecutionVertex vertex = startVertex.getGroupMember(j);
findVerticesToBeDeployed(vertex, verticesToBeDeployed, alreadyVisited);
}
}
if (!verticesToBeDeployed.isEmpty()) {
final Iterator<Map.Entry<AbstractInstance, List<ExecutionVertex>>> it2 = verticesToBeDeployed
.entrySet()
.iterator();
while (it2.hasNext()) {
final Map.Entry<AbstractInstance, List<ExecutionVertex>> entry = it2.next();
this.deploymentManager.deploy(executionGraph.getJobID(), entry.getKey(), entry.getValue());
}
}
}
@Override
public void resourcesAllocated(final JobID jobID, final List<AllocatedResource> allocatedResources) {
if (allocatedResources == null) {
LOG.error("Resource to lock is null!");
return;
}
for (final AllocatedResource allocatedResource : allocatedResources) {
if (allocatedResource.getInstance() instanceof DummyInstance) {
LOG.debug("Available instance is of type DummyInstance!");
return;
}
}
final ExecutionGraph eg = getExecutionGraphByID(jobID);
if (eg == null) {
/*
* The job have have been canceled in the meantime, in this case
* we release the instance immediately.
*/
try {
for (final AllocatedResource allocatedResource : allocatedResources) {
getInstanceManager().releaseAllocatedResource(jobID, null, allocatedResource);
}
} catch (InstanceException e) {
LOG.error(e);
}
return;
}
final Runnable command = new Runnable() {
/**
* {@inheritDoc}
*/
@Override
public void run() {
final ExecutionStage stage = eg.getCurrentExecutionStage();
synchronized (stage) {
for (final AllocatedResource allocatedResource : allocatedResources) {
AllocatedResource resourceToBeReplaced = null;
// Important: only look for instances to be replaced in the current stage
final Iterator<ExecutionGroupVertex> groupIterator = new ExecutionGroupVertexIterator(eg, true,
stage.getStageNumber());
while (groupIterator.hasNext()) {
final ExecutionGroupVertex groupVertex = groupIterator.next();
for (int i = 0; i < groupVertex.getCurrentNumberOfGroupMembers(); ++i) {
final ExecutionVertex vertex = groupVertex.getGroupMember(i);
if (vertex.getExecutionState() == ExecutionState.SCHEDULED
&& vertex.getAllocatedResource() != null) {
// In local mode, we do not consider any topology, only the instance type
if (vertex.getAllocatedResource().getInstanceType().equals(
allocatedResource.getInstanceType())) {
resourceToBeReplaced = vertex.getAllocatedResource();
break;
}
}
}
if (resourceToBeReplaced != null) {
break;
}
}
// For some reason, we don't need this instance
if (resourceToBeReplaced == null) {
LOG.error("Instance " + allocatedResource.getInstance() + " is not required for job"
+ eg.getJobID());
try {
getInstanceManager().releaseAllocatedResource(jobID, eg.getJobConfiguration(),
allocatedResource);
} catch (InstanceException e) {
LOG.error(e);
}
return;
}
// Replace the selected instance
final Iterator<ExecutionVertex> it = resourceToBeReplaced.assignedVertices();
while (it.hasNext()) {
final ExecutionVertex vertex = it.next();
vertex.setAllocatedResource(allocatedResource);
vertex.updateExecutionState(ExecutionState.ASSIGNED);
}
}
}
// Deploy the assigned vertices
deployAssignedInputVertices(eg);
}
};
eg.executeCommand(command);
}
/**
* Checks if the given {@link AllocatedResource} is still required for the
* execution of the given execution graph. If the resource is no longer
* assigned to a vertex that is either currently running or about to run
* the given resource is returned to the instance manager for deallocation.
*
* @param executionGraph
* the execution graph the provided resource has been used for so far
* @param allocatedResource
* the allocated resource to check the assignment for
*/
public void checkAndReleaseAllocatedResource(final ExecutionGraph executionGraph,
final AllocatedResource allocatedResource) {
if (allocatedResource == null) {
LOG.error("Resource to lock is null!");
return;
}
if (allocatedResource.getInstance() instanceof DummyInstance) {
LOG.debug("Available instance is of type DummyInstance!");
return;
}
boolean resourceCanBeReleased = true;
final Iterator<ExecutionVertex> it = allocatedResource.assignedVertices();
while (it.hasNext()) {
final ExecutionVertex vertex = it.next();
final ExecutionState state = vertex.getExecutionState();
if (state != ExecutionState.CREATED && state != ExecutionState.FINISHED
&& state != ExecutionState.FAILED && state != ExecutionState.CANCELED) {
resourceCanBeReleased = false;
break;
}
}
if (resourceCanBeReleased) {
LOG.info("Releasing instance " + allocatedResource.getInstance());
try {
getInstanceManager().releaseAllocatedResource(executionGraph.getJobID(), executionGraph
.getJobConfiguration(), allocatedResource);
} catch (InstanceException e) {
LOG.error(StringUtils.stringifyException(e));
}
}
}
DeploymentManager getDeploymentManager() {
return this.deploymentManager;
}
protected void replayCheckpointsFromPreviousStage(final ExecutionGraph executionGraph) {
final int currentStageIndex = executionGraph.getIndexOfCurrentExecutionStage();
final ExecutionStage previousStage = executionGraph.getStage(currentStageIndex - 1);
final List<ExecutionVertex> verticesToBeReplayed = new ArrayList<ExecutionVertex>();
for (int i = 0; i < previousStage.getNumberOfOutputExecutionVertices(); ++i) {
final ExecutionVertex vertex = previousStage.getOutputExecutionVertex(i);
vertex.updateExecutionState(ExecutionState.ASSIGNED);
verticesToBeReplayed.add(vertex);
}
deployAssignedVertices(verticesToBeReplayed);
}
/**
* Returns a map of vertices to be restarted once they have switched to their <code>CANCELED</code> state.
*
* @return the map of vertices to be restarted
*/
Map<ExecutionVertexID, ExecutionVertex> getVerticesToBeRestarted() {
return this.verticesToBeRestarted;
}
@Override
public void allocatedResourcesDied(final JobID jobID, final List<AllocatedResource> allocatedResources) {
final ExecutionGraph eg = getExecutionGraphByID(jobID);
if (eg == null) {
LOG.error("Cannot find execution graph for job with ID " + jobID);
return;
}
final Runnable command = new Runnable() {
/**
* {@inheritDoc}
*/
@Override
public void run() {
synchronized (eg) {
for (final AllocatedResource allocatedResource : allocatedResources) {
LOG.info("Resource " + allocatedResource.getInstance().getName() + " for Job " + jobID
+ " died.");
final ExecutionGraph executionGraph = getExecutionGraphByID(jobID);
if (executionGraph == null) {
LOG.error("Cannot find execution graph for job " + jobID);
return;
}
Iterator<ExecutionVertex> vertexIter = allocatedResource.assignedVertices();
// Assign vertices back to a dummy resource.
final DummyInstance dummyInstance = DummyInstance.createDummyInstance(allocatedResource
.getInstance()
.getType());
final AllocatedResource dummyResource = new AllocatedResource(dummyInstance,
allocatedResource.getInstanceType(), new AllocationID());
while (vertexIter.hasNext()) {
final ExecutionVertex vertex = vertexIter.next();
vertex.setAllocatedResource(dummyResource);
}
final String failureMessage = allocatedResource.getInstance().getName() + " died";
vertexIter = allocatedResource.assignedVertices();
while (vertexIter.hasNext()) {
final ExecutionVertex vertex = vertexIter.next();
final ExecutionState state = vertex.getExecutionState();
switch (state) {
case ASSIGNED:
case READY:
case STARTING:
case RUNNING:
case FINISHING:
vertex.updateExecutionState(ExecutionState.FAILED, failureMessage);
break;
default:
}
}
// TODO: Fix this
/*
* try {
* requestInstances(this.executionVertex.getGroupVertex().getExecutionStage());
* } catch (InstanceException e) {
* e.printStackTrace();
* // TODO: Cancel the entire job in this case
* }
*/
}
}
final InternalJobStatus js = eg.getJobStatus();
if (js != InternalJobStatus.FAILING && js != InternalJobStatus.FAILED) {
// TODO: Fix this
// deployAssignedVertices(eg);
final ExecutionStage stage = eg.getCurrentExecutionStage();
try {
requestInstances(stage);
} catch (InstanceException e) {
e.printStackTrace();
// TODO: Cancel the entire job in this case
}
}
}
};
eg.executeCommand(command);
}
}