AbstractScheduler.java example

Explorer
stratosphere-master
/***********************************************************************************************************************
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 **********************************************************************************************************************/

package eu.stratosphere.nephele.jobmanager.scheduler;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import eu.stratosphere.nephele.execution.ExecutionState;
import eu.stratosphere.nephele.executiongraph.ExecutionEdge;
import eu.stratosphere.nephele.executiongraph.ExecutionGate;
import eu.stratosphere.nephele.executiongraph.ExecutionGraph;
import eu.stratosphere.nephele.executiongraph.ExecutionGraphIterator;
import eu.stratosphere.nephele.executiongraph.ExecutionGroupVertex;
import eu.stratosphere.nephele.executiongraph.ExecutionGroupVertexIterator;
import eu.stratosphere.nephele.executiongraph.ExecutionPipeline;
import eu.stratosphere.nephele.executiongraph.ExecutionStage;
import eu.stratosphere.nephele.executiongraph.ExecutionVertex;
import eu.stratosphere.nephele.executiongraph.ExecutionVertexID;
import eu.stratosphere.nephele.executiongraph.InternalJobStatus;
import eu.stratosphere.nephele.instance.AbstractInstance;
import eu.stratosphere.nephele.instance.AllocatedResource;
import eu.stratosphere.nephele.instance.AllocationID;
import eu.stratosphere.nephele.instance.DummyInstance;
import eu.stratosphere.nephele.instance.InstanceException;
import eu.stratosphere.nephele.instance.InstanceListener;
import eu.stratosphere.nephele.instance.InstanceManager;
import eu.stratosphere.nephele.instance.InstanceRequestMap;
import eu.stratosphere.nephele.instance.InstanceType;
import eu.stratosphere.nephele.jobgraph.JobID;
import eu.stratosphere.nephele.jobmanager.DeploymentManager;
import eu.stratosphere.util.StringUtils;

/**
 * This abstract scheduler must be extended by a scheduler implementations for Nephele. The abstract class defines the
 * fundamental methods for scheduling and removing jobs. While Nephele's
 * {@link eu.stratosphere.nephele.jobmanager.JobManager} is responsible for requesting the required instances for the
 * job at the {@link eu.stratosphere.nephele.instance.InstanceManager}, the scheduler is in charge of assigning the
 * individual tasks to the instances.
 * 
 */
public abstract class AbstractScheduler implements InstanceListener {

	/**
	 * The LOG object to report events within the scheduler.
	 */
	protected static final Log LOG = LogFactory.getLog(AbstractScheduler.class);

	/**
	 * The instance manager assigned to this scheduler.
	 */
	private final InstanceManager instanceManager;

	/**
	 * The deployment manager assigned to this scheduler.
	 */
	private final DeploymentManager deploymentManager;

	/**
	 * Stores the vertices to be restarted once they have switched to the <code>CANCELED</code> state.
	 */
	private final Map<ExecutionVertexID, ExecutionVertex> verticesToBeRestarted = new ConcurrentHashMap<ExecutionVertexID, ExecutionVertex>();

	/**
	 * Constructs a new abstract scheduler.
	 * 
	 * @param deploymentManager
	 *        the deployment manager assigned to this scheduler
	 * @param instanceManager
	 *        the instance manager to be used with this scheduler
	 */
	protected AbstractScheduler(final DeploymentManager deploymentManager, final InstanceManager instanceManager) {

		this.deploymentManager = deploymentManager;
		this.instanceManager = instanceManager;
		this.instanceManager.setInstanceListener(this);
	}

	/**
	 * Adds a job represented by an {@link ExecutionGraph} object to the scheduler. The job is then executed according
	 * to the strategies of the concrete scheduler implementation.
	 * 
	 * @param executionGraph
	 *        the job to be added to the scheduler
	 * @throws SchedulingException
	 *         thrown if an error occurs and the scheduler does not accept the new job
	 */
	public abstract void schedulJob(ExecutionGraph executionGraph) throws SchedulingException;

	/**
	 * Returns the execution graph which is associated with the given job ID.
	 * 
	 * @param jobID
	 *        the job ID to search the execution graph for
	 * @return the execution graph which belongs to the given job ID or <code>null</code if no such execution graph
	 *         exists
	 */
	public abstract ExecutionGraph getExecutionGraphByID(JobID jobID);

	/**
	 * Returns the {@link InstanceManager} object which is used by the current scheduler.
	 * 
	 * @return the {@link InstanceManager} object which is used by the current scheduler
	 */
	public InstanceManager getInstanceManager() {
		return this.instanceManager;
	}

	// void removeJob(JobID jobID);

	/**
	 * Shuts the scheduler down. After shut down no jobs can be added to the scheduler.
	 */
	public abstract void shutdown();

	/**
	 * Collects the instances required to run the job from the given {@link ExecutionStage} and requests them at the
	 * loaded instance manager.
	 * 
	 * @param executionStage
	 *        the execution stage to collect the required instances from
	 * @throws InstanceException
	 *         thrown if the given execution graph is already processing its final stage
	 */
	protected void requestInstances(final ExecutionStage executionStage) throws InstanceException {

		final ExecutionGraph executionGraph = executionStage.getExecutionGraph();
		final InstanceRequestMap instanceRequestMap = new InstanceRequestMap();

		synchronized (executionStage) {

			executionStage.collectRequiredInstanceTypes(instanceRequestMap, ExecutionState.CREATED);

			final Iterator<Map.Entry<InstanceType, Integer>> it = instanceRequestMap.getMinimumIterator();
			LOG.info("Requesting the following instances for job " + executionGraph.getJobID());
			while (it.hasNext()) {
				final Map.Entry<InstanceType, Integer> entry = it.next();
				LOG.info(" " + entry.getKey() + " [" + entry.getValue().intValue() + ", "
					+ instanceRequestMap.getMaximumNumberOfInstances(entry.getKey()) + "]");
			}

			if (instanceRequestMap.isEmpty()) {
				return;
			}

			this.instanceManager.requestInstance(executionGraph.getJobID(), executionGraph.getJobConfiguration(),
				instanceRequestMap, null);

			// Switch vertex state to assigning
			final ExecutionGraphIterator it2 = new ExecutionGraphIterator(executionGraph, executionGraph
				.getIndexOfCurrentExecutionStage(), true, true);
			while (it2.hasNext()) {

				it2.next().compareAndUpdateExecutionState(ExecutionState.CREATED, ExecutionState.SCHEDULED);
			}
		}
	}

	void findVerticesToBeDeployed(final ExecutionVertex vertex,
			final Map<AbstractInstance, List<ExecutionVertex>> verticesToBeDeployed,
			final Set<ExecutionVertex> alreadyVisited) {

		if (!alreadyVisited.add(vertex)) {
			return;
		}

		if (vertex.compareAndUpdateExecutionState(ExecutionState.ASSIGNED, ExecutionState.READY)) {
			final AbstractInstance instance = vertex.getAllocatedResource().getInstance();

			if (instance instanceof DummyInstance) {
				LOG.error("Inconsistency: Vertex " + vertex + " is about to be deployed on a DummyInstance");
			}

			List<ExecutionVertex> verticesForInstance = verticesToBeDeployed.get(instance);
			if (verticesForInstance == null) {
				verticesForInstance = new ArrayList<ExecutionVertex>();
				verticesToBeDeployed.put(instance, verticesForInstance);
			}

			verticesForInstance.add(vertex);
		}

		final int numberOfOutputGates = vertex.getNumberOfOutputGates();
		for (int i = 0; i < numberOfOutputGates; ++i) {

			final ExecutionGate outputGate = vertex.getOutputGate(i);
			boolean deployTarget;

			switch (outputGate.getChannelType()) {
			case NETWORK:
				deployTarget = false;
				break;
			case IN_MEMORY:
				deployTarget = true;
				break;
			default:
				throw new IllegalStateException("Unknown channel type");
			}

			if (deployTarget) {

				final int numberOfOutputChannels = outputGate.getNumberOfEdges();
				for (int j = 0; j < numberOfOutputChannels; ++j) {
					final ExecutionEdge outputChannel = outputGate.getEdge(j);
					final ExecutionVertex connectedVertex = outputChannel.getInputGate().getVertex();
					findVerticesToBeDeployed(connectedVertex, verticesToBeDeployed, alreadyVisited);
				}
			}
		}
	}

	/**
	 * Collects all execution vertices with the state ASSIGNED starting from the given start vertex and
	 * deploys them on the assigned {@link AllocatedResource} objects.
	 * 
	 * @param startVertex
	 *        the execution vertex to start the deployment from
	 */
	public void deployAssignedVertices(final ExecutionVertex startVertex) {

		final JobID jobID = startVertex.getExecutionGraph().getJobID();

		final Map<AbstractInstance, List<ExecutionVertex>> verticesToBeDeployed = new HashMap<AbstractInstance, List<ExecutionVertex>>();
		final Set<ExecutionVertex> alreadyVisited = new HashSet<ExecutionVertex>();

		findVerticesToBeDeployed(startVertex, verticesToBeDeployed, alreadyVisited);

		if (!verticesToBeDeployed.isEmpty()) {

			final Iterator<Map.Entry<AbstractInstance, List<ExecutionVertex>>> it2 = verticesToBeDeployed
				.entrySet()
				.iterator();

			while (it2.hasNext()) {

				final Map.Entry<AbstractInstance, List<ExecutionVertex>> entry = it2.next();
				this.deploymentManager.deploy(jobID, entry.getKey(), entry.getValue());
			}
		}
	}

	/**
	 * Collects all execution vertices with the state ASSIGNED from the given pipeline and deploys them on the assigned
	 * {@link AllocatedResource} objects.
	 * 
	 * @param pipeline
	 *        the execution pipeline to be deployed
	 */
	public void deployAssignedPipeline(final ExecutionPipeline pipeline) {

		final JobID jobID = null;

		final Map<AbstractInstance, List<ExecutionVertex>> verticesToBeDeployed = new HashMap<AbstractInstance, List<ExecutionVertex>>();
		final Set<ExecutionVertex> alreadyVisited = new HashSet<ExecutionVertex>();

		final Iterator<ExecutionVertex> it = pipeline.iterator();
		while (it.hasNext()) {
			findVerticesToBeDeployed(it.next(), verticesToBeDeployed, alreadyVisited);
		}

		if (!verticesToBeDeployed.isEmpty()) {

			final Iterator<Map.Entry<AbstractInstance, List<ExecutionVertex>>> it2 = verticesToBeDeployed
				.entrySet()
				.iterator();

			while (it2.hasNext()) {

				final Map.Entry<AbstractInstance, List<ExecutionVertex>> entry = it2.next();
				this.deploymentManager.deploy(jobID, entry.getKey(), entry.getValue());
			}
		}
	}

	/**
	 * Collects all execution vertices with the state ASSIGNED starting from the given collection of start vertices and
	 * deploys them on the assigned {@link AllocatedResource} objects.
	 * 
	 * @param startVertices
	 *        the collection of execution vertices to start the deployment from
	 */
	public void deployAssignedVertices(final Collection<ExecutionVertex> startVertices) {

		JobID jobID = null;

		final Map<AbstractInstance, List<ExecutionVertex>> verticesToBeDeployed = new HashMap<AbstractInstance, List<ExecutionVertex>>();
		final Set<ExecutionVertex> alreadyVisited = new HashSet<ExecutionVertex>();

		for (final ExecutionVertex startVertex : startVertices) {

			if (jobID == null) {
				jobID = startVertex.getExecutionGraph().getJobID();
			}

			findVerticesToBeDeployed(startVertex, verticesToBeDeployed, alreadyVisited);
		}

		if (!verticesToBeDeployed.isEmpty()) {

			final Iterator<Map.Entry<AbstractInstance, List<ExecutionVertex>>> it2 = verticesToBeDeployed
				.entrySet()
				.iterator();

			while (it2.hasNext()) {

				final Map.Entry<AbstractInstance, List<ExecutionVertex>> entry = it2.next();
				this.deploymentManager.deploy(jobID, entry.getKey(), entry.getValue());
			}
		}
	}

	/**
	 * Collects all execution vertices with the state ASSIGNED starting from the input vertices of the current execution
	 * stage and deploys them on the assigned {@link AllocatedResource} objects.
	 * 
	 * @param executionGraph
	 *        the execution graph to collect the vertices from
	 */
	public void deployAssignedInputVertices(final ExecutionGraph executionGraph) {

		final Map<AbstractInstance, List<ExecutionVertex>> verticesToBeDeployed = new HashMap<AbstractInstance, List<ExecutionVertex>>();
		final ExecutionStage executionStage = executionGraph.getCurrentExecutionStage();

		final Set<ExecutionVertex> alreadyVisited = new HashSet<ExecutionVertex>();

		for (int i = 0; i < executionStage.getNumberOfStageMembers(); ++i) {

			final ExecutionGroupVertex startVertex = executionStage.getStageMember(i);
			if (!startVertex.isInputVertex()) {
				continue;
			}

			for (int j = 0; j < startVertex.getCurrentNumberOfGroupMembers(); ++j) {
				final ExecutionVertex vertex = startVertex.getGroupMember(j);
				findVerticesToBeDeployed(vertex, verticesToBeDeployed, alreadyVisited);
			}
		}

		if (!verticesToBeDeployed.isEmpty()) {

			final Iterator<Map.Entry<AbstractInstance, List<ExecutionVertex>>> it2 = verticesToBeDeployed
				.entrySet()
				.iterator();

			while (it2.hasNext()) {

				final Map.Entry<AbstractInstance, List<ExecutionVertex>> entry = it2.next();
				this.deploymentManager.deploy(executionGraph.getJobID(), entry.getKey(), entry.getValue());
			}
		}
	}


	@Override
	public void resourcesAllocated(final JobID jobID, final List<AllocatedResource> allocatedResources) {

		if (allocatedResources == null) {
			LOG.error("Resource to lock is null!");
			return;
		}

		for (final AllocatedResource allocatedResource : allocatedResources) {
			if (allocatedResource.getInstance() instanceof DummyInstance) {
				LOG.debug("Available instance is of type DummyInstance!");
				return;
			}
		}

		final ExecutionGraph eg = getExecutionGraphByID(jobID);

		if (eg == null) {
			/*
			 * The job have have been canceled in the meantime, in this case
			 * we release the instance immediately.
			 */
			try {
				for (final AllocatedResource allocatedResource : allocatedResources) {
					getInstanceManager().releaseAllocatedResource(jobID, null, allocatedResource);
				}
			} catch (InstanceException e) {
				LOG.error(e);
			}
			return;
		}

		final Runnable command = new Runnable() {

			/**
			 * {@inheritDoc}
			 */
			@Override
			public void run() {

				final ExecutionStage stage = eg.getCurrentExecutionStage();

				synchronized (stage) {

					for (final AllocatedResource allocatedResource : allocatedResources) {

						AllocatedResource resourceToBeReplaced = null;
						// Important: only look for instances to be replaced in the current stage
						final Iterator<ExecutionGroupVertex> groupIterator = new ExecutionGroupVertexIterator(eg, true,
							stage.getStageNumber());
						while (groupIterator.hasNext()) {

							final ExecutionGroupVertex groupVertex = groupIterator.next();
							for (int i = 0; i < groupVertex.getCurrentNumberOfGroupMembers(); ++i) {

								final ExecutionVertex vertex = groupVertex.getGroupMember(i);

								if (vertex.getExecutionState() == ExecutionState.SCHEDULED
									&& vertex.getAllocatedResource() != null) {
									// In local mode, we do not consider any topology, only the instance type
									if (vertex.getAllocatedResource().getInstanceType().equals(
										allocatedResource.getInstanceType())) {
										resourceToBeReplaced = vertex.getAllocatedResource();
										break;
									}
								}
							}

							if (resourceToBeReplaced != null) {
								break;
							}
						}

						// For some reason, we don't need this instance
						if (resourceToBeReplaced == null) {
							LOG.error("Instance " + allocatedResource.getInstance() + " is not required for job"
								+ eg.getJobID());
							try {
								getInstanceManager().releaseAllocatedResource(jobID, eg.getJobConfiguration(),
									allocatedResource);
							} catch (InstanceException e) {
								LOG.error(e);
							}
							return;
						}

						// Replace the selected instance
						final Iterator<ExecutionVertex> it = resourceToBeReplaced.assignedVertices();
						while (it.hasNext()) {
							final ExecutionVertex vertex = it.next();
							vertex.setAllocatedResource(allocatedResource);
							vertex.updateExecutionState(ExecutionState.ASSIGNED);
						}
					}
				}

				// Deploy the assigned vertices
				deployAssignedInputVertices(eg);

			}

		};

		eg.executeCommand(command);
	}

	/**
	 * Checks if the given {@link AllocatedResource} is still required for the
	 * execution of the given execution graph. If the resource is no longer
	 * assigned to a vertex that is either currently running or about to run
	 * the given resource is returned to the instance manager for deallocation.
	 * 
	 * @param executionGraph
	 *        the execution graph the provided resource has been used for so far
	 * @param allocatedResource
	 *        the allocated resource to check the assignment for
	 */
	public void checkAndReleaseAllocatedResource(final ExecutionGraph executionGraph,
			final AllocatedResource allocatedResource) {

		if (allocatedResource == null) {
			LOG.error("Resource to lock is null!");
			return;
		}

		if (allocatedResource.getInstance() instanceof DummyInstance) {
			LOG.debug("Available instance is of type DummyInstance!");
			return;
		}

		boolean resourceCanBeReleased = true;
		final Iterator<ExecutionVertex> it = allocatedResource.assignedVertices();
		while (it.hasNext()) {
			final ExecutionVertex vertex = it.next();
			final ExecutionState state = vertex.getExecutionState();

			if (state != ExecutionState.CREATED && state != ExecutionState.FINISHED
				&& state != ExecutionState.FAILED && state != ExecutionState.CANCELED) {

				resourceCanBeReleased = false;
				break;
			}
		}

		if (resourceCanBeReleased) {

			LOG.info("Releasing instance " + allocatedResource.getInstance());
			try {
				getInstanceManager().releaseAllocatedResource(executionGraph.getJobID(), executionGraph
					.getJobConfiguration(), allocatedResource);
			} catch (InstanceException e) {
				LOG.error(StringUtils.stringifyException(e));
			}
		}
	}

	DeploymentManager getDeploymentManager() {
		return this.deploymentManager;
	}

	protected void replayCheckpointsFromPreviousStage(final ExecutionGraph executionGraph) {

		final int currentStageIndex = executionGraph.getIndexOfCurrentExecutionStage();
		final ExecutionStage previousStage = executionGraph.getStage(currentStageIndex - 1);

		final List<ExecutionVertex> verticesToBeReplayed = new ArrayList<ExecutionVertex>();

		for (int i = 0; i < previousStage.getNumberOfOutputExecutionVertices(); ++i) {

			final ExecutionVertex vertex = previousStage.getOutputExecutionVertex(i);
			vertex.updateExecutionState(ExecutionState.ASSIGNED);
			verticesToBeReplayed.add(vertex);
		}

		deployAssignedVertices(verticesToBeReplayed);
	}

	/**
	 * Returns a map of vertices to be restarted once they have switched to their <code>CANCELED</code> state.
	 * 
	 * @return the map of vertices to be restarted
	 */
	Map<ExecutionVertexID, ExecutionVertex> getVerticesToBeRestarted() {

		return this.verticesToBeRestarted;
	}


	@Override
	public void allocatedResourcesDied(final JobID jobID, final List<AllocatedResource> allocatedResources) {

		final ExecutionGraph eg = getExecutionGraphByID(jobID);

		if (eg == null) {
			LOG.error("Cannot find execution graph for job with ID " + jobID);
			return;
		}

		final Runnable command = new Runnable() {

			/**
			 * {@inheritDoc}
			 */
			@Override
			public void run() {

				synchronized (eg) {

					for (final AllocatedResource allocatedResource : allocatedResources) {

						LOG.info("Resource " + allocatedResource.getInstance().getName() + " for Job " + jobID
							+ " died.");

						final ExecutionGraph executionGraph = getExecutionGraphByID(jobID);

						if (executionGraph == null) {
							LOG.error("Cannot find execution graph for job " + jobID);
							return;
						}

						Iterator<ExecutionVertex> vertexIter = allocatedResource.assignedVertices();

						// Assign vertices back to a dummy resource.
						final DummyInstance dummyInstance = DummyInstance.createDummyInstance(allocatedResource
							.getInstance()
							.getType());
						final AllocatedResource dummyResource = new AllocatedResource(dummyInstance,
							allocatedResource.getInstanceType(), new AllocationID());

						while (vertexIter.hasNext()) {
							final ExecutionVertex vertex = vertexIter.next();
							vertex.setAllocatedResource(dummyResource);
						}

						final String failureMessage = allocatedResource.getInstance().getName() + " died";

						vertexIter = allocatedResource.assignedVertices();

						while (vertexIter.hasNext()) {
							final ExecutionVertex vertex = vertexIter.next();
							final ExecutionState state = vertex.getExecutionState();

							switch (state) {
							case ASSIGNED:
							case READY:
							case STARTING:
							case RUNNING:
							case FINISHING:

							vertex.updateExecutionState(ExecutionState.FAILED, failureMessage);

							break;
						default:
							}
					}

					// TODO: Fix this
					/*
					 * try {
					 * requestInstances(this.executionVertex.getGroupVertex().getExecutionStage());
					 * } catch (InstanceException e) {
					 * e.printStackTrace();
					 * // TODO: Cancel the entire job in this case
					 * }
					 */
				}
			}

			final InternalJobStatus js = eg.getJobStatus();
			if (js != InternalJobStatus.FAILING && js != InternalJobStatus.FAILED) {

				// TODO: Fix this
				// deployAssignedVertices(eg);

				final ExecutionStage stage = eg.getCurrentExecutionStage();

				try {
					requestInstances(stage);
				} catch (InstanceException e) {
					e.printStackTrace();
					// TODO: Cancel the entire job in this case
				}
			}
		}
		};

		eg.executeCommand(command);
	}
}