/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.yarn.server.nodemanager.containermanager.container; import java.io.IOException; import java.net.URISyntaxException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collection; import java.util.EnumSet; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger; import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationContainerFinishedEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.LocalResourceRequest; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ContainerLocalizationCleanupEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ContainerLocalizationRequestEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerContainerFinishedEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerStartMonitoringEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainerStopMonitoringEvent; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.state.InvalidStateTransitonException; import org.apache.hadoop.yarn.state.MultipleArcTransition; import org.apache.hadoop.yarn.state.SingleArcTransition; import org.apache.hadoop.yarn.state.StateMachine; import org.apache.hadoop.yarn.state.StateMachineFactory; import org.apache.hadoop.yarn.util.ConverterUtils; public class ContainerImpl implements Container { private final Lock readLock; private final Lock writeLock; private final Dispatcher dispatcher; private final Credentials credentials; private final NodeManagerMetrics metrics; private final ContainerLaunchContext launchContext; private final ContainerTokenIdentifier containerTokenIdentifier; private final ContainerId containerId; private final Resource resource; private final String user; private int exitCode = ContainerExitStatus.INVALID; private final StringBuilder diagnostics; /** The NM-wide configuration - not specific to this container */ private final Configuration daemonConf; private static final Log LOG = LogFactory.getLog(Container.class); private final Map<LocalResourceRequest,List<String>> pendingResources = new HashMap<LocalResourceRequest,List<String>>(); private final Map<Path,List<String>> localizedResources = new HashMap<Path,List<String>>(); private final List<LocalResourceRequest> publicRsrcs = new ArrayList<LocalResourceRequest>(); private final List<LocalResourceRequest> privateRsrcs = new ArrayList<LocalResourceRequest>(); private final List<LocalResourceRequest> appRsrcs = new ArrayList<LocalResourceRequest>(); public ContainerImpl(Configuration conf, Dispatcher dispatcher, ContainerLaunchContext launchContext, Credentials creds, NodeManagerMetrics metrics, ContainerTokenIdentifier containerTokenIdentifier) { this.daemonConf = conf; this.dispatcher = dispatcher; this.launchContext = launchContext; this.containerTokenIdentifier = containerTokenIdentifier; this.containerId = containerTokenIdentifier.getContainerID(); this.resource = containerTokenIdentifier.getResource(); this.diagnostics = new StringBuilder(); this.credentials = creds; this.metrics = metrics; user = containerTokenIdentifier.getApplicationSubmitter(); ReadWriteLock readWriteLock = new ReentrantReadWriteLock(); this.readLock = readWriteLock.readLock(); this.writeLock = readWriteLock.writeLock(); stateMachine = stateMachineFactory.make(this); } private static final ContainerDoneTransition CONTAINER_DONE_TRANSITION = new ContainerDoneTransition(); private static final ContainerDiagnosticsUpdateTransition UPDATE_DIAGNOSTICS_TRANSITION = new ContainerDiagnosticsUpdateTransition(); // State Machine for each container. private static StateMachineFactory <ContainerImpl, ContainerState, ContainerEventType, ContainerEvent> stateMachineFactory = new StateMachineFactory<ContainerImpl, ContainerState, ContainerEventType, ContainerEvent>(ContainerState.NEW) // From NEW State .addTransition(ContainerState.NEW, EnumSet.of(ContainerState.LOCALIZING, ContainerState.LOCALIZED, ContainerState.LOCALIZATION_FAILED), ContainerEventType.INIT_CONTAINER, new RequestResourcesTransition()) .addTransition(ContainerState.NEW, ContainerState.NEW, ContainerEventType.UPDATE_DIAGNOSTICS_MSG, UPDATE_DIAGNOSTICS_TRANSITION) .addTransition(ContainerState.NEW, ContainerState.DONE, ContainerEventType.KILL_CONTAINER, CONTAINER_DONE_TRANSITION) // From LOCALIZING State .addTransition(ContainerState.LOCALIZING, EnumSet.of(ContainerState.LOCALIZING, ContainerState.LOCALIZED), ContainerEventType.RESOURCE_LOCALIZED, new LocalizedTransition()) .addTransition(ContainerState.LOCALIZING, ContainerState.LOCALIZATION_FAILED, ContainerEventType.RESOURCE_FAILED, new ResourceFailedTransition()) .addTransition(ContainerState.LOCALIZING, ContainerState.LOCALIZING, ContainerEventType.UPDATE_DIAGNOSTICS_MSG, UPDATE_DIAGNOSTICS_TRANSITION) .addTransition(ContainerState.LOCALIZING, ContainerState.KILLING, ContainerEventType.KILL_CONTAINER, new KillDuringLocalizationTransition()) // From LOCALIZATION_FAILED State .addTransition(ContainerState.LOCALIZATION_FAILED, ContainerState.DONE, ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP, CONTAINER_DONE_TRANSITION) .addTransition(ContainerState.LOCALIZATION_FAILED, ContainerState.LOCALIZATION_FAILED, ContainerEventType.UPDATE_DIAGNOSTICS_MSG, UPDATE_DIAGNOSTICS_TRANSITION) // container not launched so kill is a no-op .addTransition(ContainerState.LOCALIZATION_FAILED, ContainerState.LOCALIZATION_FAILED, ContainerEventType.KILL_CONTAINER) // container cleanup triggers a release of all resources // regardless of whether they were localized or not // LocalizedResource handles release event in all states .addTransition(ContainerState.LOCALIZATION_FAILED, ContainerState.LOCALIZATION_FAILED, ContainerEventType.RESOURCE_LOCALIZED) .addTransition(ContainerState.LOCALIZATION_FAILED, ContainerState.LOCALIZATION_FAILED, ContainerEventType.RESOURCE_FAILED) // From LOCALIZED State .addTransition(ContainerState.LOCALIZED, ContainerState.RUNNING, ContainerEventType.CONTAINER_LAUNCHED, new LaunchTransition()) .addTransition(ContainerState.LOCALIZED, ContainerState.EXITED_WITH_FAILURE, ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, new ExitedWithFailureTransition(true)) .addTransition(ContainerState.LOCALIZED, ContainerState.LOCALIZED, ContainerEventType.UPDATE_DIAGNOSTICS_MSG, UPDATE_DIAGNOSTICS_TRANSITION) .addTransition(ContainerState.LOCALIZED, ContainerState.KILLING, ContainerEventType.KILL_CONTAINER, new KillTransition()) // From RUNNING State .addTransition(ContainerState.RUNNING, ContainerState.EXITED_WITH_SUCCESS, ContainerEventType.CONTAINER_EXITED_WITH_SUCCESS, new ExitedWithSuccessTransition(true)) .addTransition(ContainerState.RUNNING, ContainerState.EXITED_WITH_FAILURE, ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, new ExitedWithFailureTransition(true)) .addTransition(ContainerState.RUNNING, ContainerState.RUNNING, ContainerEventType.UPDATE_DIAGNOSTICS_MSG, UPDATE_DIAGNOSTICS_TRANSITION) .addTransition(ContainerState.RUNNING, ContainerState.KILLING, ContainerEventType.KILL_CONTAINER, new KillTransition()) .addTransition(ContainerState.RUNNING, ContainerState.EXITED_WITH_FAILURE, ContainerEventType.CONTAINER_KILLED_ON_REQUEST, new KilledExternallyTransition()) // From CONTAINER_EXITED_WITH_SUCCESS State .addTransition(ContainerState.EXITED_WITH_SUCCESS, ContainerState.DONE, ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP, CONTAINER_DONE_TRANSITION) .addTransition(ContainerState.EXITED_WITH_SUCCESS, ContainerState.EXITED_WITH_SUCCESS, ContainerEventType.UPDATE_DIAGNOSTICS_MSG, UPDATE_DIAGNOSTICS_TRANSITION) .addTransition(ContainerState.EXITED_WITH_SUCCESS, ContainerState.EXITED_WITH_SUCCESS, ContainerEventType.KILL_CONTAINER) // From EXITED_WITH_FAILURE State .addTransition(ContainerState.EXITED_WITH_FAILURE, ContainerState.DONE, ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP, CONTAINER_DONE_TRANSITION) .addTransition(ContainerState.EXITED_WITH_FAILURE, ContainerState.EXITED_WITH_FAILURE, ContainerEventType.UPDATE_DIAGNOSTICS_MSG, UPDATE_DIAGNOSTICS_TRANSITION) .addTransition(ContainerState.EXITED_WITH_FAILURE, ContainerState.EXITED_WITH_FAILURE, ContainerEventType.KILL_CONTAINER) // From KILLING State. .addTransition(ContainerState.KILLING, ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL, ContainerEventType.CONTAINER_KILLED_ON_REQUEST, new ContainerKilledTransition()) .addTransition(ContainerState.KILLING, ContainerState.KILLING, ContainerEventType.RESOURCE_LOCALIZED, new LocalizedResourceDuringKillTransition()) .addTransition(ContainerState.KILLING, ContainerState.KILLING, ContainerEventType.RESOURCE_FAILED) .addTransition(ContainerState.KILLING, ContainerState.KILLING, ContainerEventType.UPDATE_DIAGNOSTICS_MSG, UPDATE_DIAGNOSTICS_TRANSITION) .addTransition(ContainerState.KILLING, ContainerState.KILLING, ContainerEventType.KILL_CONTAINER) .addTransition(ContainerState.KILLING, ContainerState.EXITED_WITH_SUCCESS, ContainerEventType.CONTAINER_EXITED_WITH_SUCCESS, new ExitedWithSuccessTransition(false)) .addTransition(ContainerState.KILLING, ContainerState.EXITED_WITH_FAILURE, ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, new ExitedWithFailureTransition(false)) .addTransition(ContainerState.KILLING, ContainerState.DONE, ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP, CONTAINER_DONE_TRANSITION) // Handle a launched container during killing stage is a no-op // as cleanup container is always handled after launch container event // in the container launcher .addTransition(ContainerState.KILLING, ContainerState.KILLING, ContainerEventType.CONTAINER_LAUNCHED) // From CONTAINER_CLEANEDUP_AFTER_KILL State. .addTransition(ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL, ContainerState.DONE, ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP, CONTAINER_DONE_TRANSITION) .addTransition(ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL, ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL, ContainerEventType.UPDATE_DIAGNOSTICS_MSG, UPDATE_DIAGNOSTICS_TRANSITION) .addTransition(ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL, ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL, ContainerEventType.KILL_CONTAINER) // From DONE .addTransition(ContainerState.DONE, ContainerState.DONE, ContainerEventType.KILL_CONTAINER) .addTransition(ContainerState.DONE, ContainerState.DONE, ContainerEventType.INIT_CONTAINER) .addTransition(ContainerState.DONE, ContainerState.DONE, ContainerEventType.UPDATE_DIAGNOSTICS_MSG, UPDATE_DIAGNOSTICS_TRANSITION) // This transition may result when // we notify container of failed localization if localizer thread (for // that container) fails for some reason .addTransition(ContainerState.DONE, ContainerState.DONE, ContainerEventType.RESOURCE_FAILED) // create the topology tables .installTopology(); private final StateMachine<ContainerState, ContainerEventType, ContainerEvent> stateMachine; private org.apache.hadoop.yarn.api.records.ContainerState getCurrentState() { switch (stateMachine.getCurrentState()) { case NEW: case LOCALIZING: case LOCALIZATION_FAILED: case LOCALIZED: case RUNNING: case EXITED_WITH_SUCCESS: case EXITED_WITH_FAILURE: case KILLING: case CONTAINER_CLEANEDUP_AFTER_KILL: case CONTAINER_RESOURCES_CLEANINGUP: return org.apache.hadoop.yarn.api.records.ContainerState.RUNNING; case DONE: default: return org.apache.hadoop.yarn.api.records.ContainerState.COMPLETE; } } @Override public String getUser() { this.readLock.lock(); try { return this.user; } finally { this.readLock.unlock(); } } @Override public Map<Path,List<String>> getLocalizedResources() { this.readLock.lock(); try { if (ContainerState.LOCALIZED == getContainerState()) { return localizedResources; } else { return null; } } finally { this.readLock.unlock(); } } @Override public Credentials getCredentials() { this.readLock.lock(); try { return credentials; } finally { this.readLock.unlock(); } } @Override public ContainerState getContainerState() { this.readLock.lock(); try { return stateMachine.getCurrentState(); } finally { this.readLock.unlock(); } } @Override public ContainerLaunchContext getLaunchContext() { this.readLock.lock(); try { return launchContext; } finally { this.readLock.unlock(); } } @Override public ContainerStatus cloneAndGetContainerStatus() { this.readLock.lock(); try { return BuilderUtils.newContainerStatus(this.containerId, getCurrentState(), diagnostics.toString(), exitCode); } finally { this.readLock.unlock(); } } @Override public ContainerId getContainerId() { return this.containerId; } @Override public Resource getResource() { return this.resource; } @Override public ContainerTokenIdentifier getContainerTokenIdentifier() { this.readLock.lock(); try { return this.containerTokenIdentifier; } finally { this.readLock.unlock(); } } @SuppressWarnings({"fallthrough", "unchecked"}) private void finished() { ApplicationId applicationId = containerId.getApplicationAttemptId().getApplicationId(); switch (getContainerState()) { case EXITED_WITH_SUCCESS: metrics.endRunningContainer(); metrics.completedContainer(); NMAuditLogger.logSuccess(user, AuditConstants.FINISH_SUCCESS_CONTAINER, "ContainerImpl", applicationId, containerId); break; case EXITED_WITH_FAILURE: metrics.endRunningContainer(); // fall through case LOCALIZATION_FAILED: metrics.failedContainer(); NMAuditLogger.logFailure(user, AuditConstants.FINISH_FAILED_CONTAINER, "ContainerImpl", "Container failed with state: " + getContainerState(), applicationId, containerId); break; case CONTAINER_CLEANEDUP_AFTER_KILL: metrics.endRunningContainer(); // fall through case NEW: metrics.killedContainer(); NMAuditLogger.logSuccess(user, AuditConstants.FINISH_KILLED_CONTAINER, "ContainerImpl", applicationId, containerId); } metrics.releaseContainer(this.resource); // Inform the application @SuppressWarnings("rawtypes") EventHandler eventHandler = dispatcher.getEventHandler(); eventHandler.handle(new ApplicationContainerFinishedEvent(containerId)); // Remove the container from the resource-monitor eventHandler.handle(new ContainerStopMonitoringEvent(containerId)); // Tell the logService too eventHandler.handle(new LogHandlerContainerFinishedEvent( containerId, exitCode)); } @SuppressWarnings("unchecked") // dispatcher not typed public void cleanup() { Map<LocalResourceVisibility, Collection<LocalResourceRequest>> rsrc = new HashMap<LocalResourceVisibility, Collection<LocalResourceRequest>>(); if (!publicRsrcs.isEmpty()) { rsrc.put(LocalResourceVisibility.PUBLIC, publicRsrcs); } if (!privateRsrcs.isEmpty()) { rsrc.put(LocalResourceVisibility.PRIVATE, privateRsrcs); } if (!appRsrcs.isEmpty()) { rsrc.put(LocalResourceVisibility.APPLICATION, appRsrcs); } dispatcher.getEventHandler().handle( new ContainerLocalizationCleanupEvent(this, rsrc)); } static class ContainerTransition implements SingleArcTransition<ContainerImpl, ContainerEvent> { @Override public void transition(ContainerImpl container, ContainerEvent event) { // Just drain the event and change the state. } } /** * State transition when a NEW container receives the INIT_CONTAINER * message. * * If there are resources to localize, sends a * ContainerLocalizationRequest (INIT_CONTAINER_RESOURCES) * to the ResourceLocalizationManager and enters LOCALIZING state. * * If there are no resources to localize, sends LAUNCH_CONTAINER event * and enters LOCALIZED state directly. * * If there are any invalid resources specified, enters LOCALIZATION_FAILED * directly. */ @SuppressWarnings("unchecked") // dispatcher not typed static class RequestResourcesTransition implements MultipleArcTransition<ContainerImpl,ContainerEvent,ContainerState> { @Override public ContainerState transition(ContainerImpl container, ContainerEvent event) { final ContainerLaunchContext ctxt = container.launchContext; container.metrics.initingContainer(); container.dispatcher.getEventHandler().handle(new AuxServicesEvent (AuxServicesEventType.CONTAINER_INIT, container)); // Inform the AuxServices about the opaque serviceData Map<String,ByteBuffer> csd = ctxt.getServiceData(); if (csd != null) { // This can happen more than once per Application as each container may // have distinct service data for (Map.Entry<String,ByteBuffer> service : csd.entrySet()) { container.dispatcher.getEventHandler().handle( new AuxServicesEvent(AuxServicesEventType.APPLICATION_INIT, container.user, container.containerId .getApplicationAttemptId().getApplicationId(), service.getKey().toString(), service.getValue())); } } // Send requests for public, private resources Map<String,LocalResource> cntrRsrc = ctxt.getLocalResources(); if (!cntrRsrc.isEmpty()) { try { for (Map.Entry<String,LocalResource> rsrc : cntrRsrc.entrySet()) { try { LocalResourceRequest req = new LocalResourceRequest(rsrc.getValue()); List<String> links = container.pendingResources.get(req); if (links == null) { links = new ArrayList<String>(); container.pendingResources.put(req, links); } links.add(rsrc.getKey()); switch (rsrc.getValue().getVisibility()) { case PUBLIC: container.publicRsrcs.add(req); break; case PRIVATE: container.privateRsrcs.add(req); break; case APPLICATION: container.appRsrcs.add(req); break; } } catch (URISyntaxException e) { LOG.info("Got exception parsing " + rsrc.getKey() + " and value " + rsrc.getValue()); throw e; } } } catch (URISyntaxException e) { // malformed resource; abort container launch LOG.warn("Failed to parse resource-request", e); container.cleanup(); container.metrics.endInitingContainer(); return ContainerState.LOCALIZATION_FAILED; } Map<LocalResourceVisibility, Collection<LocalResourceRequest>> req = new HashMap<LocalResourceVisibility, Collection<LocalResourceRequest>>(); if (!container.publicRsrcs.isEmpty()) { req.put(LocalResourceVisibility.PUBLIC, container.publicRsrcs); } if (!container.privateRsrcs.isEmpty()) { req.put(LocalResourceVisibility.PRIVATE, container.privateRsrcs); } if (!container.appRsrcs.isEmpty()) { req.put(LocalResourceVisibility.APPLICATION, container.appRsrcs); } container.dispatcher.getEventHandler().handle( new ContainerLocalizationRequestEvent(container, req)); return ContainerState.LOCALIZING; } else { container.dispatcher.getEventHandler().handle( new ContainersLauncherEvent(container, ContainersLauncherEventType.LAUNCH_CONTAINER)); container.metrics.endInitingContainer(); return ContainerState.LOCALIZED; } } } /** * Transition when one of the requested resources for this container * has been successfully localized. */ @SuppressWarnings("unchecked") // dispatcher not typed static class LocalizedTransition implements MultipleArcTransition<ContainerImpl,ContainerEvent,ContainerState> { @Override public ContainerState transition(ContainerImpl container, ContainerEvent event) { ContainerResourceLocalizedEvent rsrcEvent = (ContainerResourceLocalizedEvent) event; List<String> syms = container.pendingResources.remove(rsrcEvent.getResource()); if (null == syms) { LOG.warn("Localized unknown resource " + rsrcEvent.getResource() + " for container " + container.containerId); assert false; // fail container? return ContainerState.LOCALIZING; } container.localizedResources.put(rsrcEvent.getLocation(), syms); if (!container.pendingResources.isEmpty()) { return ContainerState.LOCALIZING; } container.dispatcher.getEventHandler().handle( new ContainersLauncherEvent(container, ContainersLauncherEventType.LAUNCH_CONTAINER)); container.metrics.endInitingContainer(); return ContainerState.LOCALIZED; } } /** * Transition from LOCALIZED state to RUNNING state upon receiving * a CONTAINER_LAUNCHED event */ @SuppressWarnings("unchecked") // dispatcher not typed static class LaunchTransition extends ContainerTransition { @Override public void transition(ContainerImpl container, ContainerEvent event) { // Inform the ContainersMonitor to start monitoring the container's // resource usage. long pmemBytes = container.getResource().getMemory() * 1024 * 1024L; float pmemRatio = container.daemonConf.getFloat( YarnConfiguration.NM_VMEM_PMEM_RATIO, YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO); long vmemBytes = (long) (pmemRatio * pmemBytes); container.dispatcher.getEventHandler().handle( new ContainerStartMonitoringEvent(container.containerId, vmemBytes, pmemBytes)); container.metrics.runningContainer(); } } /** * Transition from RUNNING or KILLING state to EXITED_WITH_SUCCESS state * upon EXITED_WITH_SUCCESS message. */ @SuppressWarnings("unchecked") // dispatcher not typed static class ExitedWithSuccessTransition extends ContainerTransition { boolean clCleanupRequired; public ExitedWithSuccessTransition(boolean clCleanupRequired) { this.clCleanupRequired = clCleanupRequired; } @Override public void transition(ContainerImpl container, ContainerEvent event) { // Set exit code to 0 on success container.exitCode = 0; // TODO: Add containerWorkDir to the deletion service. if (clCleanupRequired) { container.dispatcher.getEventHandler().handle( new ContainersLauncherEvent(container, ContainersLauncherEventType.CLEANUP_CONTAINER)); } container.cleanup(); } } /** * Transition to EXITED_WITH_FAILURE state upon * CONTAINER_EXITED_WITH_FAILURE state. **/ @SuppressWarnings("unchecked") // dispatcher not typed static class ExitedWithFailureTransition extends ContainerTransition { boolean clCleanupRequired; public ExitedWithFailureTransition(boolean clCleanupRequired) { this.clCleanupRequired = clCleanupRequired; } @Override public void transition(ContainerImpl container, ContainerEvent event) { ContainerExitEvent exitEvent = (ContainerExitEvent) event; container.exitCode = exitEvent.getExitCode(); // TODO: Add containerWorkDir to the deletion service. // TODO: Add containerOuputDir to the deletion service. if (clCleanupRequired) { container.dispatcher.getEventHandler().handle( new ContainersLauncherEvent(container, ContainersLauncherEventType.CLEANUP_CONTAINER)); } container.cleanup(); } } /** * Transition to EXITED_WITH_FAILURE upon receiving KILLED_ON_REQUEST */ static class KilledExternallyTransition extends ExitedWithFailureTransition { KilledExternallyTransition() { super(true); } @Override public void transition(ContainerImpl container, ContainerEvent event) { super.transition(container, event); container.diagnostics.append("Killed by external signal\n"); } } /** * Transition from LOCALIZING to LOCALIZATION_FAILED upon receiving * RESOURCE_FAILED event. */ static class ResourceFailedTransition implements SingleArcTransition<ContainerImpl, ContainerEvent> { @Override public void transition(ContainerImpl container, ContainerEvent event) { ContainerResourceFailedEvent rsrcFailedEvent = (ContainerResourceFailedEvent) event; container.diagnostics.append(rsrcFailedEvent.getDiagnosticMessage() + "\n"); // Inform the localizer to decrement reference counts and cleanup // resources. container.cleanup(); container.metrics.endInitingContainer(); } } /** * Transition from LOCALIZING to KILLING upon receiving * KILL_CONTAINER event. */ static class KillDuringLocalizationTransition implements SingleArcTransition<ContainerImpl, ContainerEvent> { @Override public void transition(ContainerImpl container, ContainerEvent event) { // Inform the localizer to decrement reference counts and cleanup // resources. container.cleanup(); container.metrics.endInitingContainer(); ContainerKillEvent killEvent = (ContainerKillEvent) event; container.diagnostics.append(killEvent.getDiagnostic()).append("\n"); } } /** * Remain in KILLING state when receiving a RESOURCE_LOCALIZED request * while in the process of killing. */ static class LocalizedResourceDuringKillTransition implements SingleArcTransition<ContainerImpl, ContainerEvent> { @Override public void transition(ContainerImpl container, ContainerEvent event) { ContainerResourceLocalizedEvent rsrcEvent = (ContainerResourceLocalizedEvent) event; List<String> syms = container.pendingResources.remove(rsrcEvent.getResource()); if (null == syms) { LOG.warn("Localized unknown resource " + rsrcEvent.getResource() + " for container " + container.containerId); assert false; // fail container? return; } container.localizedResources.put(rsrcEvent.getLocation(), syms); } } /** * Transitions upon receiving KILL_CONTAINER: * - LOCALIZED -> KILLING * - RUNNING -> KILLING */ @SuppressWarnings("unchecked") // dispatcher not typed static class KillTransition implements SingleArcTransition<ContainerImpl, ContainerEvent> { @Override public void transition(ContainerImpl container, ContainerEvent event) { // Kill the process/process-grp container.dispatcher.getEventHandler().handle( new ContainersLauncherEvent(container, ContainersLauncherEventType.CLEANUP_CONTAINER)); ContainerKillEvent killEvent = (ContainerKillEvent) event; container.diagnostics.append(killEvent.getDiagnostic()).append("\n"); } } /** * Transition from KILLING to CONTAINER_CLEANEDUP_AFTER_KILL * upon receiving CONTAINER_KILLED_ON_REQUEST. */ static class ContainerKilledTransition implements SingleArcTransition<ContainerImpl, ContainerEvent> { @Override public void transition(ContainerImpl container, ContainerEvent event) { ContainerExitEvent exitEvent = (ContainerExitEvent) event; container.exitCode = exitEvent.getExitCode(); // The process/process-grp is killed. Decrement reference counts and // cleanup resources container.cleanup(); } } /** * Handle the following transitions: * - NEW -> DONE upon KILL_CONTAINER * - {LOCALIZATION_FAILED, EXITED_WITH_SUCCESS, EXITED_WITH_FAILURE, * KILLING, CONTAINER_CLEANEDUP_AFTER_KILL} * -> DONE upon CONTAINER_RESOURCES_CLEANEDUP */ static class ContainerDoneTransition implements SingleArcTransition<ContainerImpl, ContainerEvent> { @Override @SuppressWarnings("unchecked") public void transition(ContainerImpl container, ContainerEvent event) { container.finished(); //if the current state is NEW it means the CONTAINER_INIT was never // sent for the event, thus no need to send the CONTAINER_STOP if (container.getCurrentState() != org.apache.hadoop.yarn.api.records.ContainerState.NEW) { container.dispatcher.getEventHandler().handle(new AuxServicesEvent (AuxServicesEventType.CONTAINER_STOP, container)); } } } /** * Update diagnostics, staying in the same state. */ static class ContainerDiagnosticsUpdateTransition implements SingleArcTransition<ContainerImpl, ContainerEvent> { @Override public void transition(ContainerImpl container, ContainerEvent event) { ContainerDiagnosticsUpdateEvent updateEvent = (ContainerDiagnosticsUpdateEvent) event; container.diagnostics.append(updateEvent.getDiagnosticsUpdate()) .append("\n"); } } @Override public void handle(ContainerEvent event) { try { this.writeLock.lock(); ContainerId containerID = event.getContainerID(); LOG.debug("Processing " + containerID + " of type " + event.getType()); ContainerState oldState = stateMachine.getCurrentState(); ContainerState newState = null; try { newState = stateMachine.doTransition(event.getType(), event); } catch (InvalidStateTransitonException e) { LOG.warn("Can't handle this event at current state: Current: [" + oldState + "], eventType: [" + event.getType() + "]", e); } if (oldState != newState) { LOG.info("Container " + containerID + " transitioned from " + oldState + " to " + newState); } } finally { this.writeLock.unlock(); } } @Override public String toString() { this.readLock.lock(); try { return ConverterUtils.toString(this.containerId); } finally { this.readLock.unlock(); } } }