/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.corona; import java.io.IOException; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentHashMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.CoronaSerializer; import org.codehaus.jackson.JsonGenerator; import org.codehaus.jackson.JsonToken; /** * Manages a collection of sessions */ public class SessionManager implements Configurable { private static final Log LOG = LogFactory.getLog(SessionManager.class); private static final String DATE_FORMAT_PATTERN = "yyyyMMddHHmm"; private ArrayDeque<RetiredSession> retiredSessions = new ArrayDeque<RetiredSession>(); private CoronaConf conf; private ClusterManager clusterManager; private AtomicLong sessionCounter = new AtomicLong(); /** The number of resource requests/releases to process under the * session lock. Not configurable for now */ private int requestBatchSize = 1000; private int sessionExpiryInterval; private int numRetiredSessions; private Thread expireSessionsThread = null; private ExpireSessions expireSessions = new ExpireSessions(); private Thread metricsUpdaterThread; private MetricsUpdater metricsUpdater = new MetricsUpdater(); private volatile boolean shutdown = false; private String startTime; // 1: primary data structure private ConcurrentMap<String, Session> sessions = new ConcurrentHashMap<String, Session>(); // 2: list of all the sessions who need compute resources right now private ConcurrentMap<String, Session> runnableSessions = new ConcurrentHashMap<String, Session>(); /** * Constructor for SessionManager * * @param clusterManager The ClusterManager instance to be used */ public SessionManager(ClusterManager clusterManager) { DateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT_PATTERN); this.startTime = dateFormat.format(new Date(clusterManager.getStartTime())); this.clusterManager = clusterManager; this.expireSessionsThread = new Thread(this.expireSessions, "expireSessions"); this.expireSessionsThread.setDaemon(true); this.expireSessionsThread.start(); this.metricsUpdaterThread = new Thread(this.metricsUpdater, "SessionManager metrics"); this.metricsUpdaterThread.setDaemon(true); this.metricsUpdaterThread.start(); } /** * Constructor for SessionManager, used when we are reading back the * ClusterManager state from the disk * * @param clusterManager The ClusterManager instance to be used * @param coronaSerializer The CoronaSerializer instance, which will be used * to read JSON from disk * @throws IOException */ public SessionManager(ClusterManager clusterManager, CoronaSerializer coronaSerializer) throws IOException { this(clusterManager); // Even though the expireSessions thread would be running now, it would // not expire any sessions we would be creating now, because the // ClusterManager would be in Safe Mode. // Expecting the START_OBJECT token for sessionManager coronaSerializer.readStartObjectToken("sessionManager"); readSessions(coronaSerializer); coronaSerializer.readField("sessionCounter"); sessionCounter = new AtomicLong(coronaSerializer.readValueAs(Long.class)); // Expecting the END_OBJECT token for sessionManager coronaSerializer.readEndObjectToken("sessionManager"); // Restoring the runnableSessions map for (String sessionId : sessions.keySet()) { Session session = sessions.get(sessionId); if (session.getPendingRequestCount() > 0) { runnableSessions.put(sessionId, session); } } } /** * Reads back the sessions map from a JSON stream * * @param coronaSerializer The CoronaSerializer instance to be used to * read the JSON * @throws IOException */ private void readSessions(CoronaSerializer coronaSerializer) throws IOException { coronaSerializer.readField("sessions"); // Expecting the START_OBJECT token for sessions coronaSerializer.readStartObjectToken("sessions"); JsonToken current = coronaSerializer.nextToken(); while (current != JsonToken.END_OBJECT) { String sessionId = coronaSerializer.getFieldName(); Session session = new Session(clusterManager.conf.getCMHeartbeatDelayMax(), coronaSerializer); sessions.put(sessionId, session); current = coronaSerializer.nextToken(); } // Done with reading the END_OBJECT token for sessions } /** * This method rebuilds members related to the SessionManager instance, * which were not directly persisted themselves. */ public void restoreAfterSafeModeRestart() { if (!clusterManager.safeMode) { return; } for (Session session : sessions.values()) { for (ResourceRequestInfo resourceRequestInfo : session.idToRequest.values()) { // The helper method to restore the ResourceRequestInfo instances // is placed in NodeManager because it makes use of other members // of NodeManager clusterManager.nodeManager. restoreResourceRequestInfo(resourceRequestInfo); } session.restoreAfterSafeModeRestart(); clusterManager.getScheduler().addSession(session.getSessionId(), session); } clusterManager.getMetrics().setNumRunningSessions(sessions.size()); } /** * Used to write the state of the SessionManager instance to disk, when we * are persisting the state of the ClusterManager * @param jsonGenerator The JsonGenerator instance being used to write JSON * to disk * @throws IOException */ public void write(JsonGenerator jsonGenerator) throws IOException { jsonGenerator.writeStartObject(); // retiredSessions and numRetiredSessions need not be persisted // sessionCounter can be set to 0, when the SessionManager is instantiated // sessions begins jsonGenerator.writeFieldName("sessions"); jsonGenerator.writeStartObject(); for (String sessionId : sessions.keySet()) { jsonGenerator.writeFieldName(sessionId); sessions.get(sessionId).write(jsonGenerator); } jsonGenerator.writeEndObject(); // sessions ends jsonGenerator.writeNumberField("sessionCounter", sessionCounter.longValue()); jsonGenerator.writeEndObject(); // We can rebuild runnableSessions // No need to write startTime and numRetiredSessions } public Set<String> getSessions() { return sessions.keySet(); } /** * Helper class for getTypePoolInfoAveWaitMs(). */ private static class WaitCount { /** Total waited msecs */ private long totalWaitMsecs; /** Number of entries */ private int count; /** * Constructor. * @param intialWaitMsecs Initial waited msecs */ WaitCount(long intialWaitMsecs) { totalWaitMsecs = intialWaitMsecs; count = 1; } /** * Add wait msecs * @param waitMsecs Waited msecs */ void addWaitMsecs(long waitMsecs) { totalWaitMsecs += waitMsecs; ++count; } /** * Get the average wait. * @return total wait msecs / count */ long getAverageWait() { return totalWaitMsecs / count; } } /** * Get a map of pool infos to average wait times for first * resource of a resource type. * @param type Resource type * @return Map of pools into average first resource time */ public Map<PoolInfo, Long> getTypePoolInfoAveFirstWaitMs(ResourceType type) { Map<PoolInfo, WaitCount> poolInfoWaitCount = new HashMap<PoolInfo, WaitCount>(); for (Session session : sessions.values()) { synchronized (session) { if (!session.isDeleted()) { Long wait = session.getTypeFirstWaitMs(type); if (wait == null) { continue; } WaitCount waitCount = poolInfoWaitCount.get(session.getPoolInfo()); if (waitCount == null) { poolInfoWaitCount.put(session.getPoolInfo(), new WaitCount(wait)); } else { waitCount.addWaitMsecs(wait); } } } } Map<PoolInfo, Long> poolInfoWaitMs = new HashMap<PoolInfo, Long>(poolInfoWaitCount.size()); for (Map.Entry<PoolInfo, WaitCount> entry : poolInfoWaitCount.entrySet()) { poolInfoWaitMs.put(entry.getKey(), entry.getValue().getAverageWait()); } return poolInfoWaitMs; } public Session getSession(String handle) throws InvalidSessionHandle { Session session = sessions.get(handle); if (session == null) { throw new InvalidSessionHandle(handle); } return session; } public List<Session> getRunnableSessions() { List<Session> ret = new ArrayList<Session>(runnableSessions.size()); ret.addAll(runnableSessions.values()); return ret; } public String getNextSessionId() { String sessionId = startTime + "." + sessionCounter.incrementAndGet(); return sessionId; } public Session addSession(String sessionId, SessionInfo info) throws InvalidSessionHandle { if (!sessionId.startsWith(startTime)) { throw new InvalidSessionHandle( "Session belongs to a different start time " + sessionId); } if (sessions.containsKey(sessionId)) { throw new InvalidSessionHandle("Session already started " + sessionId); } Session session = new Session(conf.getCMHeartbeatDelayMax(), sessionId, info, clusterManager.getScheduler().getConfigManager()); PoolGroupManager.checkPoolInfoIfStrict( session.getPoolInfo(), clusterManager.getScheduler().getConfigManager(), conf); sessions.put(sessionId, session); clusterManager.getMetrics().sessionStart(); clusterManager.getMetrics().setNumRunningSessions(sessions.size()); clusterManager.getScheduler().addSession(sessionId.toString(), session); LOG.info("Add Session " + sessionId + " -> " + info.getName() + "@" + info.getAddress().getHost() + ":" + info.getAddress().getPort()); return session; } public void updateInfo(String handle, SessionInfo info) throws InvalidSessionHandle { Session session = getSession(handle); synchronized (session) { if (session.isDeleted()) { throw new InvalidSessionHandle(handle); } session.updateInfoUrlAndName(info.url, info.name); session.updateSessionPriority(info.priority); session.updateSessionDeadline(info.deadline); } } public Collection<ResourceGrant> deleteSession(String handle, SessionStatus status) throws InvalidSessionHandle { Session session = getSession(handle); synchronized (session) { if (session.isDeleted()) { throw new InvalidSessionHandle(handle); } session.setDeleted(); session.setStatus(status); sessions.remove(session.getSessionId()); clusterManager.getNodeManager().deleteSession(handle); clusterManager.getMetrics().setNumRunningSessions(sessions.size()); clusterManager.getMetrics().sessionEnd(status); runnableSessions.remove(session.getSessionId()); retireSession(session); } return session.getGrants(); } public void heartbeat(String handle) throws InvalidSessionHandle { Session session = getSession(handle); session.heartbeat(); } public void heartbeatV2(String handle, HeartbeatArgs jtInfo) throws InvalidSessionHandle { Session session = getSession(handle); session.heartbeat(); session.storeResourceUsages(jtInfo.resourceUsages); } public void requestResource( String handle, List<ResourceRequestInfo> requestList) throws InvalidSessionHandle { Session session = getSession(handle); int listSize = requestList.size(); // Limit the number of requests to process under the session lock. // This is required to prevent slow down of the scheduler threads, which // need to grab the session lock for all running sessions. for (int i = 0; i < listSize;) { int toIndex = Math.min(i + requestBatchSize, listSize); List<ResourceRequestInfo> toProcess = requestList.subList(i, toIndex); i += toIndex - i; synchronized (session) { if (session.isDeleted()) { throw new InvalidSessionHandle(handle); } int previousPending = session.getPendingRequestCount(); session.requestResource(toProcess); if (previousPending <= 0 && (session.getPendingRequestCount() > 0)) { runnableSessions.put(session.getSessionId(), session); } } } } public Collection<ResourceGrant> releaseResource( String handle, List<Integer> idList) throws InvalidSessionHandle { Session session = getSession(handle); List<ResourceGrant> canceledGrants = null; int listSize = idList.size(); // Limit the number of releases to process under the session lock. // This is required to prevent slow down of the scheduler threads, which // need to grab the session lock for all running sessions. for (int i = 0; i < listSize;) { int toIndex = Math.min(i + requestBatchSize, listSize); List<Integer> toProcess = idList.subList(i, toIndex); i += toIndex - i; synchronized (session) { if (session.isDeleted()) { throw new InvalidSessionHandle(handle); } if (canceledGrants == null) { canceledGrants = session.releaseResource(toProcess); } else { canceledGrants.addAll(session.releaseResource(toProcess)); } if (session.getPendingRequestCount() <= 0) { runnableSessions.remove(session.getSessionId()); } } } return canceledGrants; } public List<ResourceGrant> revokeResource(String handle, List<Integer> idList) throws InvalidSessionHandle { Session session = getSession(handle); synchronized (session) { if (session.isDeleted()) { throw new InvalidSessionHandle(handle); } int previousPending = session.getPendingRequestCount(); List<ResourceGrant> canceledGrants = session.revokeResource(idList); if (previousPending <= 0 && (session.getPendingRequestCount() > 0)) { runnableSessions.put(session.getSessionId(), session); } return canceledGrants; } } /** * Unlike other api's defined by the SessionManager - this one is invoked by * the scheduler when it already has a lock on the session and has a valid * session handle. The call is routed through the SessionManager to make sure * that any indices/views maintained on top of the sessions are maintained * accurately */ public void grantResource( Session session, ResourceRequestInfo req, ResourceGrant grant) { session.grantResource(req, grant); if (session.getPendingRequestCount() <= 0) { runnableSessions.remove(session.getSessionId()); } } public void setConf(Configuration conf) { this.conf = (CoronaConf) conf; sessionExpiryInterval = this.conf.getSessionExpiryInterval(); numRetiredSessions = this.conf.getNumRetiredSessions(); LOG.info("Will keep " + numRetiredSessions + " retired sessions in memory"); if (this.expireSessionsThread != null) { this.expireSessionsThread.interrupt(); } } public Configuration getConf() { return conf; } public int getRequestCountForType(ResourceType type) { int total = 0; for (Session session: sessions.values()) { synchronized (session) { if (session.isDeleted()) { continue; } total += session.getRequestCountForType(type); } } return total; } public int getGrantCountForType(ResourceType type) { int total = 0; for (Session session: sessions.values()) { synchronized (session) { if (!session.isDeleted()) { total += session.getGrantCountForType(type); } } } return total; } public int getPendingRequestCountForType(ResourceType type) { int total = 0; for (Session session: sessions.values()) { synchronized (session) { if (session.isDeleted()) { continue; } total += session.getPendingRequestForType(type).size(); } } return total; } public int getRunningSessionCount() { return sessions.size(); } class MetricsUpdater implements Runnable { public void run() { while (!shutdown) { try { Thread.sleep(5000); // If the ClusterManager is in Safe Mode, we do not need to update // the metrics if (clusterManager.safeMode) { continue; } NodeManager nm = clusterManager.getNodeManager(); ClusterManagerMetrics metrics = clusterManager.getMetrics(); for (ResourceType resourceType: clusterManager.getTypes()) { int pending = getPendingRequestCountForType(resourceType); int running = getRequestCountForType(resourceType) - pending; int totalSlots = nm.getMaxCpuForType(resourceType); int freeSlots = totalSlots - nm.getAllocatedCpuForType(resourceType); metrics.setPendingRequestCount(resourceType, pending); metrics.setRunningRequestCount(resourceType, running); metrics.setTotalSlots(resourceType, totalSlots); metrics.setFreeSlots(resourceType, freeSlots); } } catch (InterruptedException iex) { // ignore. if shutting down, while cond. will catch it } } } } class ExpireSessions implements Runnable { @Override public void run() { while (!shutdown) { try { Thread.sleep(sessionExpiryInterval / 2); /** * If we are in safe mode, we should not expire any sessions, and * reset the last seen time before we come out of safe mode. */ if (clusterManager.safeMode) { continue; } long now = ClusterManager.clock.getTime(); for (Session session: sessions.values()) { long gap = now - session.getLastHeartbeatTime(); if (gap > sessionExpiryInterval) { LOG.warn("Timing out session: " + session.getHandle() + " (" + session.getName() + ") " + "after a heartbeat gap of " + gap + " msec"); try { clusterManager.sessionEnd( session.getHandle(), SessionStatus.TIMED_OUT); } catch (InvalidSessionHandle e) { LOG.warn( "Ignoring error while expiring session " + session.getHandle(), e); } catch (SafeModeException e) { // You could come here, if the safe mode is set while you are // in the for-loop. LOG.info( "Got a SafeModeException in the Expire Sessions thread"); // We need not loop any further. break; } catch (org.apache.thrift.TException e) { // Should not happen since we are making a function call, // not thrift call. LOG.warn( "Ignoring error while expiring session " + session.getHandle(), e); } } } } catch (InterruptedException iex) { // ignore. if shutting down, while cond. will catch it } } } } protected void retireSession(Session session) { synchronized (retiredSessions) { while (retiredSessions.size() > numRetiredSessions) { retiredSessions.remove(); } retiredSessions.add(new RetiredSession(session)); } } public Collection<RetiredSession> getRetiredSessions() { return retiredSessions; } /** * This is required when we come out of safe mode, and we need to reset * the lastHeartbeatTime for each session */ public void resetSessionsLastHeartbeatTime() { for (Session session : sessions.values()) { session.heartbeat(); } } }