/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.corona;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.http.HttpServer;
import org.apache.hadoop.mapred.Clock;
import org.apache.hadoop.net.NetUtils;
import org.apache.thrift.TApplicationException;
import org.apache.thrift.TException;
public class ClusterManager implements ClusterManagerService.Iface {
public static final Log LOG =
LogFactory.getLog(ClusterManager.class);
public static Clock clock = new Clock();
// node manager manages collections of nodes
protected NodeManager nodeManager;
// session manager manages collections of sessions
protected SessionManager sessionManager;
// http server
protected HttpServer infoServer;
// the scheduler service matches free nodes to runnable sessions
protected Scheduler scheduler;
// the session notifier asynchronously notifies sessions about various events
protected SessionNotifier sessionNotifier;
protected ClusterManagerMetrics metrics;
protected CoronaConf conf;
// a bunch of variables for building UI
protected long startTime;
protected String hostName;
protected Map<String, Object> legalTypes =
new IdentityHashMap<String, Object> (256);
protected void initLegalTypes() {
Map<Integer, Map<String, Integer>> cpuToResourcePartitioning =
conf.getCpuToResourcePartitioning();
for(Map.Entry<Integer, Map<String, Integer>> entry:
cpuToResourcePartitioning.entrySet()) {
for (String type: entry.getValue().keySet()) {
legalTypes.put(type.intern(), this);
}
}
legalTypes = Collections.unmodifiableMap(legalTypes);
}
public ClusterManager() {
// provided only to help testing
}
public ClusterManager(Configuration conf) throws IOException {
this(new CoronaConf(conf));
}
public ClusterManager(CoronaConf conf) throws IOException {
this.conf = conf;
initLegalTypes();
metrics = new ClusterManagerMetrics(getTypes());
sessionManager = new SessionManager(this);
sessionManager.setConf(conf);
nodeManager = new NodeManager(this);
nodeManager.setConf(conf);
sessionNotifier = new SessionNotifier(sessionManager, this, metrics);
sessionNotifier.setConf(conf);
scheduler = new Scheduler(nodeManager, sessionManager,
sessionNotifier, getTypes());
scheduler.setConf(conf);
scheduler.start();
InetSocketAddress infoSocAddr = NetUtils.createSocketAddr(conf.getClusterManagerHttpAddress());
infoServer = new HttpServer("cm", infoSocAddr.getHostName(), infoSocAddr.getPort(),
infoSocAddr.getPort() == 0, conf);
infoServer.setAttribute("cm", this);
infoServer.start();
startTime = clock.getTime();
hostName = infoSocAddr.getHostName();
}
public ClusterManagerMetrics getMetrics() {
return metrics;
}
public SessionNotifier getSessionNotifier() {
return sessionNotifier;
}
public SessionManager getSessionManager() {
return sessionManager;
}
public NodeManager getNodeManager() {
return nodeManager;
}
public Scheduler getScheduler() {
return scheduler;
}
public Collection<String> getTypes() {
return Collections.unmodifiableCollection(legalTypes.keySet());
}
@Override
public SessionRegistrationData sessionStart(SessionInfo info) throws TException {
return new SessionRegistrationData(
sessionManager.addSession(info), new ClusterManagerInfo("", ""));
}
@Override
public void sessionEnd(String handle, SessionStatus status) throws TException, InvalidSessionHandle {
try {
LOG.info("sessionEnd called for session: " + handle + " with status: " + status);
Collection<ResourceGrant> canceledGrants = sessionManager.deleteSession(handle, status);
if (canceledGrants == null) {
return;
}
for(ResourceGrant grant: canceledGrants) {
nodeManager.cancelGrant(grant.nodeName, handle, grant.id);
metrics.releaseResource(grant.type);
}
scheduler.notifyScheduler();
sessionNotifier.deleteSession(handle);
} catch (RuntimeException e) {
throw new TApplicationException(e.getMessage());
}
}
@Override
public void sessionUpdateInfo(String handle, SessionInfo info) throws TException, InvalidSessionHandle {
try {
LOG.info("sessionUpdateInfo called for session: " + handle + " with info: " + info);
sessionManager.updateInfo(handle, info);
} catch (RuntimeException e) {
throw new TApplicationException(e.getMessage());
}
}
@Override
public void sessionHeartbeat(String handle) throws TException, InvalidSessionHandle {
try {
sessionManager.heartbeat(handle);
} catch (RuntimeException e) {
throw new TApplicationException(e.getMessage());
}
}
/**
* canonicalize strings used in maps so that we can use cheaper identitymaps
*/
protected void canonicalizeResourceRequest(List<ResourceRequest> requestList) {
for(ResourceRequest req: requestList) {
req.type = req.type.intern();
}
}
protected boolean checkResourceRequestType(
List<ResourceRequest> requestList) {
for(ResourceRequest req: requestList) {
if (legalTypes.get(req.type) == null)
return false;
}
return true;
}
protected boolean checkResourceRequestExcluded(
List<ResourceRequest> requestList) {
Set<String> excluded = new HashSet<String>();
for(ResourceRequest req: requestList) {
if (req.getExcludeHosts() == null ||
req.getHosts() == null) {
continue;
}
excluded.clear();
excluded.addAll(req.getExcludeHosts());
for (String host : req.getHosts()) {
if (excluded.contains(host)) {
return false;
}
}
}
return true;
}
@Override
public void requestResource(String handle, List<ResourceRequest> requestList)
throws TException, InvalidSessionHandle {
try {
LOG.info ("Request " + requestList.size() +
" resources from session: " + handle);
canonicalizeResourceRequest(requestList);
if (!checkResourceRequestType(requestList)) {
LOG.error ("Bad resource type from session: " + handle);
throw new TApplicationException("Bad resource type");
}
if (!checkResourceRequestExcluded(requestList)) {
LOG.error ("Bad excluded hosts from session: " + handle);
throw new TApplicationException("Requesting excluded hosts");
}
sessionManager.requestResource(handle, requestList);
for (ResourceRequest req : requestList) {
metrics.requestResource(req.type);
}
scheduler.notifyScheduler();
} catch (RuntimeException e) {
e.printStackTrace();
throw new TApplicationException(e.getMessage());
}
}
@Override
public void releaseResource(String handle, List<Integer> idList)
throws TException, InvalidSessionHandle {
try {
LOG.info ("Release " + idList.size() + " resources from session: " + handle);
Collection<ResourceGrant> canceledGrants =
sessionManager.releaseResource(handle, idList);
if (canceledGrants == null) {
// LOG.info("No canceled grants for session " + handle);
return;
}
for(ResourceGrant grant: canceledGrants) {
nodeManager.cancelGrant(grant.nodeName, handle, grant.id);
metrics.releaseResource(grant.type);
}
scheduler.notifyScheduler();
} catch (RuntimeException e) {
throw new TApplicationException(e.getMessage());
}
}
@Override
public void nodeHeartbeat(ClusterNodeInfo node)
throws TException {
//LOG.info("heartbeat from node: " + node.toString());
if (nodeManager.heartbeat(node))
scheduler.notifyScheduler();
}
/**
* This is an internal api called to tell the cluster manager that a
* a particular node seems dysfunctional and that it should be removed
* from the cluster
*/
public void nodeTimeout(String nodeName) {
Set<ClusterNode.GrantId> grantsToRevoke = nodeManager.deleteNode(nodeName);
if (grantsToRevoke == null)
return;
for(ClusterNode.GrantId grantId: grantsToRevoke) {
String sessionHandle = grantId.sessionId;
try {
List<ResourceGrant> revokedGrants =
sessionManager.revokeResource(sessionHandle,
Collections.singletonList(grantId.requestId));
if ((revokedGrants != null) && !revokedGrants.isEmpty()) {
sessionNotifier.notifyRevokeResource(sessionHandle, revokedGrants, false);
}
} catch (InvalidSessionHandle e) {
// ignore
LOG.warn("Found invalid session: " + sessionHandle + " while timing out node: " + nodeName);
}
}
scheduler.notifyScheduler();
}
public long getStartTime() {
return startTime;
}
public String getHostName() {
return hostName;
}
}