/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.yarn.server.resourcemanager.recovery; import java.nio.ByteBuffer; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import javax.crypto.SecretKey; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.delegation.DelegationKey; import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl; import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationAttemptStateDataPBImpl; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationStateDataPBImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppStoredEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRemovedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStoredEvent; @Private @Unstable /** * Base class to implement storage of ResourceManager state. * Takes care of asynchronous notifications and interfacing with YARN objects. * Real store implementations need to derive from it and implement blocking * store and load methods to actually store and load the state. */ public abstract class RMStateStore extends AbstractService { public static final Log LOG = LogFactory.getLog(RMStateStore.class); public RMStateStore() { super(RMStateStore.class.getName()); } /** * State of an application attempt */ public static class ApplicationAttemptState { final ApplicationAttemptId attemptId; final Container masterContainer; final Credentials appAttemptCredentials; public ApplicationAttemptState(ApplicationAttemptId attemptId, Container masterContainer, Credentials appAttemptCredentials) { this.attemptId = attemptId; this.masterContainer = masterContainer; this.appAttemptCredentials = appAttemptCredentials; } public Container getMasterContainer() { return masterContainer; } public ApplicationAttemptId getAttemptId() { return attemptId; } public Credentials getAppAttemptCredentials() { return appAttemptCredentials; } } /** * State of an application application */ public static class ApplicationState { final ApplicationSubmissionContext context; final long submitTime; final String user; Map<ApplicationAttemptId, ApplicationAttemptState> attempts = new HashMap<ApplicationAttemptId, ApplicationAttemptState>(); ApplicationState(long submitTime, ApplicationSubmissionContext context, String user) { this.submitTime = submitTime; this.context = context; this.user = user; } public ApplicationId getAppId() { return context.getApplicationId(); } public long getSubmitTime() { return submitTime; } public int getAttemptCount() { return attempts.size(); } public ApplicationSubmissionContext getApplicationSubmissionContext() { return context; } public ApplicationAttemptState getAttempt(ApplicationAttemptId attemptId) { return attempts.get(attemptId); } public String getUser() { return user; } } public static class RMDTSecretManagerState { // DTIdentifier -> renewDate Map<RMDelegationTokenIdentifier, Long> delegationTokenState = new HashMap<RMDelegationTokenIdentifier, Long>(); Set<DelegationKey> masterKeyState = new HashSet<DelegationKey>(); int dtSequenceNumber = 0; public Map<RMDelegationTokenIdentifier, Long> getTokenState() { return delegationTokenState; } public Set<DelegationKey> getMasterKeyState() { return masterKeyState; } public int getDTSequenceNumber() { return dtSequenceNumber; } } /** * State of the ResourceManager */ public static class RMState { Map<ApplicationId, ApplicationState> appState = new HashMap<ApplicationId, ApplicationState>(); RMDTSecretManagerState rmSecretManagerState = new RMDTSecretManagerState(); public Map<ApplicationId, ApplicationState> getApplicationState() { return appState; } public RMDTSecretManagerState getRMDTSecretManagerState() { return rmSecretManagerState; } } private Dispatcher rmDispatcher; /** * Dispatcher used to send state operation completion events to * ResourceManager services */ public void setRMDispatcher(Dispatcher dispatcher) { this.rmDispatcher = dispatcher; } AsyncDispatcher dispatcher; public synchronized void serviceInit(Configuration conf) throws Exception{ // create async handler dispatcher = new AsyncDispatcher(); dispatcher.init(conf); dispatcher.register(RMStateStoreEventType.class, new ForwardingEventHandler()); initInternal(conf); } protected synchronized void serviceStart() throws Exception { dispatcher.start(); startInternal(); } /** * Derived classes initialize themselves using this method. */ protected abstract void initInternal(Configuration conf) throws Exception; /** * Derived classes start themselves using this method. * The base class is started and the event dispatcher is ready to use at * this point */ protected abstract void startInternal() throws Exception; public synchronized void serviceStop() throws Exception { closeInternal(); dispatcher.stop(); } /** * Derived classes close themselves using this method. * The base class will be closed and the event dispatcher will be shutdown * after this */ protected abstract void closeInternal() throws Exception; /** * Blocking API * The derived class must recover state from the store and return a new * RMState object populated with that state * This must not be called on the dispatcher thread */ public abstract RMState loadState() throws Exception; /** * Non-Blocking API * ResourceManager services use this to store the application's state * This does not block the dispatcher threads * RMAppStoredEvent will be sent on completion to notify the RMApp */ @SuppressWarnings("unchecked") public synchronized void storeApplication(RMApp app) { ApplicationSubmissionContext context = app .getApplicationSubmissionContext(); assert context instanceof ApplicationSubmissionContextPBImpl; ApplicationState appState = new ApplicationState( app.getSubmitTime(), context, app.getUser()); dispatcher.getEventHandler().handle(new RMStateStoreAppEvent(appState)); } /** * Blocking API * Derived classes must implement this method to store the state of an * application. */ protected abstract void storeApplicationState(String appId, ApplicationStateDataPBImpl appStateData) throws Exception; @SuppressWarnings("unchecked") /** * Non-blocking API * ResourceManager services call this to store state on an application attempt * This does not block the dispatcher threads * RMAppAttemptStoredEvent will be sent on completion to notify the RMAppAttempt */ public synchronized void storeApplicationAttempt(RMAppAttempt appAttempt) { Credentials credentials = getCredentialsFromAppAttempt(appAttempt); ApplicationAttemptState attemptState = new ApplicationAttemptState(appAttempt.getAppAttemptId(), appAttempt.getMasterContainer(), credentials); dispatcher.getEventHandler().handle( new RMStateStoreAppAttemptEvent(attemptState)); } /** * Blocking API * Derived classes must implement this method to store the state of an * application attempt */ protected abstract void storeApplicationAttemptState(String attemptId, ApplicationAttemptStateDataPBImpl attemptStateData) throws Exception; /** * RMDTSecretManager call this to store the state of a delegation token * and sequence number */ public synchronized void storeRMDelegationTokenAndSequenceNumber( RMDelegationTokenIdentifier rmDTIdentifier, Long renewDate, int latestSequenceNumber) throws Exception { storeRMDelegationTokenAndSequenceNumberState(rmDTIdentifier, renewDate, latestSequenceNumber); } /** * Blocking API * Derived classes must implement this method to store the state of * RMDelegationToken and sequence number */ protected abstract void storeRMDelegationTokenAndSequenceNumberState( RMDelegationTokenIdentifier rmDTIdentifier, Long renewDate, int latestSequenceNumber) throws Exception; /** * RMDTSecretManager call this to remove the state of a delegation token */ public synchronized void removeRMDelegationToken( RMDelegationTokenIdentifier rmDTIdentifier, int sequenceNumber) throws Exception { removeRMDelegationTokenState(rmDTIdentifier); } /** * Blocking API * Derived classes must implement this method to remove the state of RMDelegationToken */ protected abstract void removeRMDelegationTokenState( RMDelegationTokenIdentifier rmDTIdentifier) throws Exception; /** * RMDTSecretManager call this to store the state of a master key */ public synchronized void storeRMDTMasterKey(DelegationKey delegationKey) throws Exception { storeRMDTMasterKeyState(delegationKey); } /** * Blocking API * Derived classes must implement this method to store the state of * DelegationToken Master Key */ protected abstract void storeRMDTMasterKeyState(DelegationKey delegationKey) throws Exception; /** * RMDTSecretManager call this to remove the state of a master key */ public synchronized void removeRMDTMasterKey(DelegationKey delegationKey) throws Exception { removeRMDTMasterKeyState(delegationKey); } /** * Blocking API * Derived classes must implement this method to remove the state of * DelegationToken Master Key */ protected abstract void removeRMDTMasterKeyState(DelegationKey delegationKey) throws Exception; /** * Non-blocking API * ResourceManager services call this to remove an application from the state * store * This does not block the dispatcher threads * There is no notification of completion for this operation. */ public synchronized void removeApplication(RMApp app) { ApplicationState appState = new ApplicationState( app.getSubmitTime(), app.getApplicationSubmissionContext(), app.getUser()); for(RMAppAttempt appAttempt : app.getAppAttempts().values()) { Credentials credentials = getCredentialsFromAppAttempt(appAttempt); ApplicationAttemptState attemptState = new ApplicationAttemptState(appAttempt.getAppAttemptId(), appAttempt.getMasterContainer(), credentials); appState.attempts.put(attemptState.getAttemptId(), attemptState); } removeApplication(appState); } @SuppressWarnings("unchecked") /** * Non-Blocking API */ public synchronized void removeApplication(ApplicationState appState) { dispatcher.getEventHandler().handle(new RMStateStoreRemoveAppEvent(appState)); } /** * Blocking API * Derived classes must implement this method to remove the state of an * application and its attempts */ protected abstract void removeApplicationState(ApplicationState appState) throws Exception; // TODO: This should eventually become cluster-Id + "AM_RM_TOKEN_SERVICE". See // YARN-986 public static final Text AM_RM_TOKEN_SERVICE = new Text( "AM_RM_TOKEN_SERVICE"); public static final Text AM_CLIENT_TOKEN_MASTER_KEY_NAME = new Text("YARN_CLIENT_TOKEN_MASTER_KEY"); private Credentials getCredentialsFromAppAttempt(RMAppAttempt appAttempt) { Credentials credentials = new Credentials(); Token<AMRMTokenIdentifier> appToken = appAttempt.getAMRMToken(); if(appToken != null){ credentials.addToken(AM_RM_TOKEN_SERVICE, appToken); } SecretKey clientTokenMasterKey = appAttempt.getClientTokenMasterKey(); if(clientTokenMasterKey != null){ credentials.addSecretKey(AM_CLIENT_TOKEN_MASTER_KEY_NAME, clientTokenMasterKey.getEncoded()); } return credentials; } // Dispatcher related code private synchronized void handleStoreEvent(RMStateStoreEvent event) { switch(event.getType()) { case STORE_APP: { ApplicationState apptState = ((RMStateStoreAppEvent) event).getAppState(); Exception storedException = null; ApplicationStateDataPBImpl appStateData = new ApplicationStateDataPBImpl(); appStateData.setSubmitTime(apptState.getSubmitTime()); appStateData.setApplicationSubmissionContext( apptState.getApplicationSubmissionContext()); appStateData.setUser(apptState.getUser()); ApplicationId appId = apptState.getApplicationSubmissionContext().getApplicationId(); LOG.info("Storing info for app: " + appId); try { storeApplicationState(appId.toString(), appStateData); } catch (Exception e) { LOG.error("Error storing app: " + appId, e); storedException = e; } finally { notifyDoneStoringApplication(appId, storedException); } } break; case STORE_APP_ATTEMPT: { ApplicationAttemptState attemptState = ((RMStateStoreAppAttemptEvent) event).getAppAttemptState(); Exception storedException = null; Credentials credentials = attemptState.getAppAttemptCredentials(); ByteBuffer appAttemptTokens = null; try { if(credentials != null){ DataOutputBuffer dob = new DataOutputBuffer(); credentials.writeTokenStorageToStream(dob); appAttemptTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); } ApplicationAttemptStateDataPBImpl attemptStateData = (ApplicationAttemptStateDataPBImpl) ApplicationAttemptStateDataPBImpl .newApplicationAttemptStateData(attemptState.getAttemptId(), attemptState.getMasterContainer(), appAttemptTokens); LOG.info("Storing info for attempt: " + attemptState.getAttemptId()); storeApplicationAttemptState(attemptState.getAttemptId().toString(), attemptStateData); } catch (Exception e) { LOG.error("Error storing appAttempt: " + attemptState.getAttemptId(), e); storedException = e; } finally { notifyDoneStoringApplicationAttempt(attemptState.getAttemptId(), storedException); } } break; case REMOVE_APP: { ApplicationState appState = ((RMStateStoreRemoveAppEvent) event).getAppState(); ApplicationId appId = appState.getAppId(); Exception removedException = null; LOG.info("Removing info for app: " + appId); try { removeApplicationState(appState); } catch (Exception e) { LOG.error("Error removing app: " + appId, e); removedException = e; } finally { notifyDoneRemovingApplcation(appId, removedException); } } break; default: LOG.error("Unknown RMStateStoreEvent type: " + event.getType()); } } @SuppressWarnings("unchecked") /** * In (@link handleStoreEvent}, this method is called to notify the * application about operation completion * @param appId id of the application that has been saved * @param storedException the exception that is thrown when storing the * application */ private void notifyDoneStoringApplication(ApplicationId appId, Exception storedException) { rmDispatcher.getEventHandler().handle( new RMAppStoredEvent(appId, storedException)); } @SuppressWarnings("unchecked") /** * In (@link handleStoreEvent}, this method is called to notify the * application attempt about operation completion * @param appAttempt attempt that has been saved */ private void notifyDoneStoringApplicationAttempt(ApplicationAttemptId attemptId, Exception storedException) { rmDispatcher.getEventHandler().handle( new RMAppAttemptStoredEvent(attemptId, storedException)); } @SuppressWarnings("unchecked") /** * This is to notify RMApp that this application has been removed from * RMStateStore */ private void notifyDoneRemovingApplcation(ApplicationId appId, Exception removedException) { rmDispatcher.getEventHandler().handle( new RMAppRemovedEvent(appId, removedException)); } /** * EventHandler implementation which forward events to the FSRMStateStore * This hides the EventHandle methods of the store from its public interface */ private final class ForwardingEventHandler implements EventHandler<RMStateStoreEvent> { @Override public void handle(RMStateStoreEvent event) { handleStoreEvent(event); } } }