/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.corona; import java.io.IOException; import java.util.EnumMap; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.net.Node; import org.apache.hadoop.util.CoronaSerializer; import org.codehaus.jackson.JsonGenerator; import org.codehaus.jackson.JsonToken; public class ClusterNode { /** Class logger */ public static final Log LOG = LogFactory.getLog(ClusterNode.class); public long lastHeartbeatTime; public boolean deleted = false; // This is no longer a final because when we restart after an upgrade, we will // initialize the hostNode outside the constructor. public Node hostNode; private ClusterNodeInfo clusterNodeInfo; private volatile ComputeSpecs freeSpecs; private Map<ResourceType, Integer> resourceTypeToMaxCpu = new EnumMap<ResourceType, Integer>(ResourceType.class); private Map<ResourceType, Stats> resourceTypeToStatsMap = new EnumMap<ResourceType, Stats>(ResourceType.class); protected Map<GrantId, ResourceRequestInfo> grants = new HashMap<GrantId, ResourceRequestInfo>(); protected ComputeSpecs granted = new ComputeSpecs(); // All integral fields get initialized to 0. public static class Stats { private volatile int allocatedCpu; private volatile int grantCount; } /** * Metadata about a granted resource on a node. */ public static class GrantId { /** Session identifier */ private final String sessionId; /** Request identifier */ private final int requestId; /** Unique name based on sessionId and requestId */ private final String unique; /** * Constructor. * * @param sessionId Session id using this grant * @param requestId Grant id */ public GrantId(String sessionId, int requestId) { this.sessionId = sessionId; this.requestId = requestId; this.unique = sessionId + requestId; } /** * Constructor for GrantId, used when we are reading back the state from * the disk * @param coronaSerializer The CoronaSerializer instance being used to read * the JSON from disk * @throws IOException */ public GrantId(CoronaSerializer coronaSerializer) throws IOException { // Expecting the START_OBJECT token for GrantId coronaSerializer.readStartObjectToken("GrantId"); coronaSerializer.readField("sessionId"); this.sessionId = coronaSerializer.readValueAs(String.class); coronaSerializer.readField("requestId"); this.requestId = coronaSerializer.readValueAs(Integer.class); // Expecting the END_OBJECT token for GrantId coronaSerializer.readEndObjectToken("GrantId"); this.unique = this.sessionId + this.requestId; } public String getSessionId() { return sessionId; } public int getRequestId() { return requestId; } @Override public int hashCode() { return unique.hashCode(); } @Override public boolean equals(Object that) { if (that == null) { return false; } if (that instanceof GrantId) { return this.equals((GrantId) that); } return false; } /** * Check if it equals another GrantId * * @param that Other GrandId * @return True if the same, false otherwise */ public boolean equals(GrantId that) { if (that == null) { return false; } return this.unique.equals(that.unique); } /** * Used to write the state of the GrantId instance to disk, when we are * persisting the state of the NodeManager * @param jsonGenerator The JsonGenerator instance being used to write JSON * to disk * @throws IOException */ public void write(JsonGenerator jsonGenerator) throws IOException { jsonGenerator.writeStartObject(); jsonGenerator.writeObjectField("sessionId", sessionId); jsonGenerator.writeObjectField("requestId", requestId); jsonGenerator.writeEndObject(); } } public ClusterNode( ClusterNodeInfo clusterNodeInfo, Node node, Map<Integer, Map<ResourceType, Integer>> cpuToResourcePartitioning) { clusterNodeInfo.address.host = clusterNodeInfo.address.host.intern(); this.clusterNodeInfo = clusterNodeInfo; this.freeSpecs = clusterNodeInfo.getFree(); lastHeartbeatTime = ClusterManager.clock.getTime(); this.hostNode = node; resetResourceTypeToStatsMap(); initResourceTypeToMaxCpuMap(cpuToResourcePartitioning); } /** * Constructor for ClusterNode, used when we are reading back the state from * the disk * @param coronaSerializer The CoronaSerializer instance being used to read * the JSON from disk * @throws IOException */ ClusterNode(CoronaSerializer coronaSerializer) throws IOException { // Initialize the resourceTypeToStatsMap map resetResourceTypeToStatsMap(); // Expecting the START_OBJECT token for ClusterNode coronaSerializer.readStartObjectToken("ClusterNode"); readClusterNodeInfo(coronaSerializer); coronaSerializer.readField("grants"); readGrants(coronaSerializer); // Expecting the END_OBJECT token for ClusterManager coronaSerializer.readEndObjectToken("ClusterNode"); // We will initialize the hostNode field later in the restoreClusterNode() // method in NodeManager, which is the last stage of restoring the // NodeManager state hostNode = null; // Done with reading the END_OBJECT token for ClusterNode } /** * Reads the clusterNodeInfo object from the JSON stream * @param coronaSerializer The CoronaSerializer instance being used to read * the JSON from disk * @throws IOException */ private void readClusterNodeInfo(CoronaSerializer coronaSerializer) throws IOException { coronaSerializer.readField("clusterNodeInfo"); clusterNodeInfo = new ClusterNodeInfo(); // Expecting the START_OBJECT token for clusterNodeInfo coronaSerializer.readStartObjectToken("clusterNodeInfo"); coronaSerializer.readField("name"); clusterNodeInfo.name = coronaSerializer.readValueAs(String.class); coronaSerializer.readField("address"); clusterNodeInfo.address = coronaSerializer.readValueAs(InetAddress.class); coronaSerializer.readField("total"); clusterNodeInfo.total = coronaSerializer.readValueAs(ComputeSpecs.class); coronaSerializer.readField("free"); clusterNodeInfo.free = coronaSerializer.readValueAs(ComputeSpecs.class); coronaSerializer.readField("resourceInfos"); clusterNodeInfo.resourceInfos = coronaSerializer.readValueAs(Map.class); // Expecting the END_OBJECT token for clusterNodeInfo coronaSerializer.readEndObjectToken("clusterNodeInfo"); } /** * Reads the list of grants from the JSON stream * @param coronaSerializer The CoronaSerializer instance being used to read * the JSON from disk * @throws IOException */ private void readGrants(CoronaSerializer coronaSerializer) throws IOException { // Expecting the START_OBJECT token for grants coronaSerializer.readStartObjectToken("grants"); JsonToken current = coronaSerializer.nextToken(); while (current != JsonToken.END_OBJECT) { // We can access the key for the grant, but it is not required // Expecting the START_OBJECT token for the grant coronaSerializer.readStartObjectToken("grant"); coronaSerializer.readField("grantId"); GrantId grantId = new GrantId(coronaSerializer); coronaSerializer.readField("grant"); ResourceRequestInfo resourceRequestInfo = new ResourceRequestInfo(coronaSerializer); // Expecting the END_OBJECT token for the grant coronaSerializer.readEndObjectToken("grant"); // This will update the grants map and the resourceTypeToStatsMap map addGrant(grantId.getSessionId(), resourceRequestInfo); current = coronaSerializer.nextToken(); } } /** * Used to write the state of the ClusterNode instance to disk, when we are * persisting the state of the NodeManager * @param jsonGenerator The JsonGenerator instance being used to write JSON * to disk * @throws IOException */ public void write(JsonGenerator jsonGenerator) throws IOException { jsonGenerator.writeStartObject(); // clusterNodeInfo begins jsonGenerator.writeFieldName("clusterNodeInfo"); jsonGenerator.writeStartObject(); jsonGenerator.writeStringField("name", clusterNodeInfo.name); jsonGenerator.writeObjectField("address", clusterNodeInfo.address); jsonGenerator.writeObjectField("total", clusterNodeInfo.total); jsonGenerator.writeObjectField("free", clusterNodeInfo.free); jsonGenerator.writeObjectField("resourceInfos", clusterNodeInfo.resourceInfos); jsonGenerator.writeEndObject(); // clusterNodeInfo ends // grants begins jsonGenerator.writeFieldName("grants"); jsonGenerator.writeStartObject(); for (Map.Entry<GrantId, ResourceRequestInfo> entry : grants.entrySet()) { jsonGenerator.writeFieldName(entry.getKey().unique); jsonGenerator.writeStartObject(); jsonGenerator.writeFieldName("grantId"); entry.getKey().write(jsonGenerator); jsonGenerator.writeFieldName("grant"); entry.getValue().write(jsonGenerator); jsonGenerator.writeEndObject(); } jsonGenerator.writeEndObject(); // grants ends jsonGenerator.writeEndObject(); // We skip the hostNode and lastHeartbeatTime as they need not be persisted. // resourceTypeToMaxCpu and resourceTypeToStatsMap can be rebuilt using the // conf and the grants respectively. } /** * This method is used to reset the mapping of resource type to stats. */ public void resetResourceTypeToStatsMap() { for (ResourceType type : ResourceType.values()) { resourceTypeToStatsMap.put(type, new Stats()); } } /** * This method is used to initialize the resource type to max CPU mapping * based upon the cpuToResourcePartitioning instance given * @param cpuToResourcePartitioning Mapping of cpus to resources to be used */ public void initResourceTypeToMaxCpuMap(Map<Integer, Map<ResourceType, Integer>> cpuToResourcePartitioning) { resourceTypeToMaxCpu = getResourceTypeToCountMap((int) clusterNodeInfo.total.numCpus, cpuToResourcePartitioning); } /** * Get a mapping of the resource type to amount of resources for a given * number of cpus. * * @param numCpus Number of cpus available * @param cpuToResourcePartitioning Mapping of number of cpus to resources * @return Resources for this amount of cpus */ public static Map<ResourceType, Integer> getResourceTypeToCountMap( int numCpus, Map<Integer, Map<ResourceType, Integer>> cpuToResourcePartitioning) { Map<ResourceType, Integer> ret = cpuToResourcePartitioning.get(numCpus); if (ret == null) { Map<ResourceType, Integer> oneCpuMap = cpuToResourcePartitioning.get(1); if (oneCpuMap == null) { throw new RuntimeException( "No matching entry for cpu count: " + numCpus + " in node and no 1 cpu map"); } ret = new EnumMap<ResourceType, Integer>(ResourceType.class); for (ResourceType key: oneCpuMap.keySet()) { ret.put(key, oneCpuMap.get(key).intValue() * numCpus); } } return ret; } public void addGrant(String sessionId, ResourceRequestInfo req) { if (deleted) throw new RuntimeException ("Node " + getName() + " has been deleted"); grants.put(new GrantId(sessionId, req.getId()), req); incrementGrantCount(req.getType()); // update allocated counts Utilities.incrComputeSpecs(granted, req.getSpecs()); Stats stats = resourceTypeToStatsMap.get(req.getType()); stats.allocatedCpu += req.getSpecs().numCpus; //LOG.info("Node " + getName() + " has granted " + granted.numCpus + " cpus"); } public ResourceRequestInfo getRequestForGrant(String sessionId, int requestId) { return grants.get(new GrantId(sessionId, requestId)); } public void cancelGrant(String sessionId, int requestId) { if (deleted) throw new RuntimeException ("Node " + getName() + " has been deleted"); ResourceRequestInfo req = grants.remove(new GrantId(sessionId, requestId)); if (req != null) { Utilities.decrComputeSpecs(granted, req.getSpecs()); Stats stats = resourceTypeToStatsMap.get(req.getType()); stats.allocatedCpu -= req.getSpecs().numCpus; decrementGrantCount(req.getType()); } } public boolean checkForGrant( ResourceRequest req, ResourceLimit resourceLimit) { if (deleted) throw new RuntimeException ("Node " + getName() + " has been deleted"); int cpuAlloced = resourceTypeToStatsMap.get(req.type).allocatedCpu; Integer cpuMax = resourceTypeToMaxCpu.get(req.type); boolean enoughCpu = cpuMax.intValue() >= req.getSpecs().numCpus + cpuAlloced; boolean enoughMem = resourceLimit.hasEnoughResource(this); return enoughCpu && enoughMem; } public void heartbeat(ClusterNodeInfo newClusterNodeInfo) { lastHeartbeatTime = ClusterManager.clock.getTime(); freeSpecs = newClusterNodeInfo.getFree(); } public String getName() { return clusterNodeInfo.name; } public String getHost() { return clusterNodeInfo.address.host; } public InetAddress getAddress() { return clusterNodeInfo.address; } public ClusterNodeInfo getClusterNodeInfo() { return clusterNodeInfo; } public ComputeSpecs getFree() { return freeSpecs; } public ComputeSpecs getTotal() { return clusterNodeInfo.getTotal(); } public Set<GrantId> getGrants() { HashSet<GrantId> ret = new HashSet<GrantId> (); ret.addAll(grants.keySet()); return (ret); } private void incrementGrantCount(ResourceType type) { resourceTypeToStatsMap.get(type).grantCount++; } private void decrementGrantCount(ResourceType type) { resourceTypeToStatsMap.get(type).grantCount--; } public int getGrantCount(ResourceType type) { return resourceTypeToStatsMap.get(type).grantCount; } public Set<GrantId> getGrants(ResourceType type) { HashSet<GrantId> ret = new HashSet<GrantId> (); for (Map.Entry<GrantId, ResourceRequestInfo> entry : grants.entrySet()) { if (entry.getValue().getType().equals(type)) { ret.add(entry.getKey()); } } return ret; } public int getMaxCpuForType(ResourceType type) { Integer i = resourceTypeToMaxCpu.get(type); if (i == null) return 0; else return i.intValue(); } public int getAllocatedCpuForType(ResourceType type) { return resourceTypeToStatsMap.get(type).allocatedCpu; } }