package org.apache.hadoop.corona;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.Iterator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.StringUtils;
/**
* Manages a collection of sessions
*/
public class SessionManager implements Configurable {
public static final Log LOG = LogFactory.getLog(SessionManager.class);
private static final String DATE_FORMAT_PATTERN = "yyyyMMddHHmm";
protected CoronaConf conf;
protected ClusterManager clusterManager;
protected AtomicLong sessionCounter = new AtomicLong();
protected static int sessionExpiryInterval;
protected Thread expireSessionsThread = null;
protected ExpireSessions expireSessions = new ExpireSessions();
protected Thread metricsUpdaterThread;
protected MetricsUpdater metricsUpdater = new MetricsUpdater();
protected volatile boolean shutdown = false;
protected String startTime;
// 1: primary data structure
protected ConcurrentMap<String, Session> sessions
= new ConcurrentHashMap<String, Session> ();
// 2: list of all the sessions who need compute resources right now
protected ConcurrentMap<String, Session> runnableSessions
= new ConcurrentHashMap<String, Session> ();
public Set<String> getSessions() {
return sessions.keySet();
}
public Session getSession (String handle) throws InvalidSessionHandle {
Session session = sessions.get(handle);
if (session == null) {
throw new InvalidSessionHandle (handle);
}
return session;
}
public List<Session> getRunnableSessions() {
List<Session> ret = new ArrayList<Session> (runnableSessions.size());
ret.addAll(runnableSessions.values());
return ret;
}
public SessionManager(ClusterManager clusterManager) {
DateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT_PATTERN);
this.startTime = dateFormat.format(new Date());
this.clusterManager = clusterManager;
this.expireSessionsThread = new Thread(this.expireSessions,
"expireSessions");
this.expireSessionsThread.setDaemon(true);
this.expireSessionsThread.start();
this.metricsUpdaterThread = new Thread(this.metricsUpdater,
"SessionManager metrics");
this.metricsUpdaterThread.setDaemon(true);
this.metricsUpdaterThread.start();
}
public String addSession(SessionInfo info) {
String sessionId = startTime + "." + sessionCounter.incrementAndGet();
Session session = new Session(sessionId, info);
sessions.put(sessionId, session);
clusterManager.getMetrics().sessionStart();
clusterManager.getMetrics().setNumRunningSessions(sessions.size());
clusterManager.getScheduler().addSession(sessionId.toString(), session);
LOG.info("Add Session " +
sessionId + " -> " +
info.getName() + "@" +
info.getAddress().getHost() + ":" +
info.getAddress().getPort());
return sessionId.toString();
}
public void updateInfo(String handle, SessionInfo info)
throws InvalidSessionHandle {
Session session = getSession(handle);
synchronized (session) {
if (session.deleted)
throw new InvalidSessionHandle(handle);
session.updateInfo(info);
}
}
public Collection<ResourceGrant> deleteSession(String handle, SessionStatus status)
throws InvalidSessionHandle {
Session session = getSession(handle);
synchronized (session) {
if (session.deleted)
throw new InvalidSessionHandle(handle);
session.deleted = true;
session.status = status;
sessions.remove(session.sessionId);
clusterManager.getMetrics().setNumRunningSessions(sessions.size());
clusterManager.getMetrics().sessionEnd(status);
runnableSessions.remove(session.sessionId);
retireSession(session);
}
return session.getGrants();
}
public void heartbeat(String handle) throws InvalidSessionHandle {
Session session = getSession(handle);
session.heartbeat();
}
public void requestResource(String handle, List<ResourceRequest> requestList)
throws InvalidSessionHandle {
Session session = getSession(handle);
synchronized (session) {
if (session.deleted)
throw new InvalidSessionHandle(handle);
int previousPending = session.getPendingRequestCount();
session.requestResource(requestList);
if (previousPending <= 0 && (session.getPendingRequestCount() > 0))
runnableSessions.put(session.sessionId, session);
}
}
public Collection<ResourceGrant> releaseResource(String handle, List<Integer> idList)
throws InvalidSessionHandle {
Session session = getSession(handle);
synchronized (session) {
if (session.deleted)
throw new InvalidSessionHandle(handle);
List<ResourceGrant> canceledGrants = session.releaseResource(idList);
if (session.getPendingRequestCount() <= 0) {
runnableSessions.remove(session.sessionId);
}
return canceledGrants;
}
}
public List<ResourceGrant> revokeResource(String handle, List<Integer> idList)
throws InvalidSessionHandle {
Session session = getSession(handle);
synchronized (session) {
if (session.deleted)
throw new InvalidSessionHandle(handle);
int previousPending = session.getPendingRequestCount();
List<ResourceGrant> canceledGrants = session.revokeResource(idList);
if (previousPending <= 0 && (session.getPendingRequestCount() > 0))
runnableSessions.put(session.sessionId, session);
return canceledGrants;
}
}
/**
* Unlike other api's defined by the SessionManager - this one is invoked by
* the scheduler when it already has a lock on the session and has a valid
* session handle. The call is routed through the SessionManager to make sure
* that any indices/views maintained on top of the sessions are maintained
* accurately
*/
public void grantResource(Session session, ResourceRequest req, ResourceGrant grant) {
session.grantResource(req, grant);
if (session.getPendingRequestCount() <= 0) {
runnableSessions.remove(session.sessionId);
}
}
public void setConf(Configuration conf) {
this.conf = (CoronaConf) conf;
sessionExpiryInterval = this.conf.getSessionExpiryInterval();
if (this.expireSessionsThread != null)
this.expireSessionsThread.interrupt();
}
public Configuration getConf() {
return conf;
}
public int getRequestCountForType(String type) {
int total = 0;
for (Session session: sessions.values()) {
synchronized(session) {
if (session.deleted)
continue;
total += session.getRequestCountForType(type);
}
}
return total;
}
public int getPendingRequestCountForType(String type) {
int total = 0;
for (Session session: sessions.values()) {
synchronized(session) {
if (session.deleted)
continue;
total += session.getPendingRequestForType(type).size();
}
}
return total;
}
public int getRunningSessionCount() {
return sessions.size();
}
class MetricsUpdater implements Runnable {
public void run() {
while (!shutdown) {
try {
Thread.sleep(5000);
NodeManager nm = clusterManager.getNodeManager();
ClusterManagerMetrics metrics = clusterManager.getMetrics();
for (String resourceType: clusterManager.getTypes()) {
int pending = getPendingRequestCountForType(resourceType);
int running = getRequestCountForType(resourceType) - pending;
int totalSlots = nm.getMaxCpuForType(resourceType);
int freeSlots = totalSlots - nm.getAllocatedCpuForType(resourceType);
metrics.setPendingRequestCount(resourceType, pending);
metrics.setRunningRequestCount(resourceType, running);
metrics.setTotalSlots(resourceType, totalSlots);
metrics.setFreeSlots(resourceType, freeSlots);
}
} catch (InterruptedException iex) {
// ignore. if shutting down, while cond. will catch it
} catch (Exception t) {
LOG.error("Session Expiry Thread got exception: " +
StringUtils.stringifyException(t));
}
}
}
}
class ExpireSessions implements Runnable {
public ExpireSessions() {
}
public void run() {
while (!shutdown) {
try {
Thread.sleep(sessionExpiryInterval/2);
long now = ClusterManager.clock.getTime();
for(Session session: sessions.values()) {
long gap = now - session.lastHeartbeatTime;
if (gap > sessionExpiryInterval) {
LOG.warn("Timing out session: " + session.getName() +
" after a heartbeat gap of " + gap + " msec");
try {
clusterManager.sessionEnd(session.getHandle(), SessionStatus.TIMED_OUT);
} catch (Exception e) {}
}
}
} catch (InterruptedException iex) {
// ignore. if shutting down, while cond. will catch it
} catch (Exception t) {
LOG.error("Session Expiry Thread got exception: " +
StringUtils.stringifyException(t));
}
}
}
}
public static final int MAX_RETIRED_SESSIONS = 1000;
protected ArrayDeque<RetiredSession> retiredSessions =
new ArrayDeque<RetiredSession>();
protected void retireSession(Session session) {
synchronized (retiredSessions) {
while (retiredSessions.size() > MAX_RETIRED_SESSIONS) {
retiredSessions.remove();
}
retiredSessions.add(new RetiredSession(session));
}
}
public Collection<RetiredSession> getRetiredSessions() {
return retiredSessions;
}
}