/*
* RHQ Management Platform
* Copyright (C) 2005-2008 Red Hat, Inc.
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation version 2 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
package org.rhq.enterprise.server.cloud;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.ejb.EJB;
import javax.ejb.Stateless;
import javax.persistence.EntityManager;
import javax.persistence.NoResultException;
import javax.persistence.PersistenceContext;
import javax.persistence.Query;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.rhq.core.domain.cloud.AffinityGroup;
import org.rhq.core.domain.cloud.FailoverList;
import org.rhq.core.domain.cloud.FailoverListDetails;
import org.rhq.core.domain.cloud.PartitionEvent;
import org.rhq.core.domain.cloud.PartitionEventDetails;
import org.rhq.core.domain.cloud.Server;
import org.rhq.core.domain.cloud.composite.FailoverListComposite;
import org.rhq.core.domain.cloud.composite.FailoverListComposite.ServerEntry;
import org.rhq.core.domain.cloud.composite.FailoverListDetailsComposite;
import org.rhq.core.domain.resource.Agent;
import org.rhq.enterprise.server.RHQConstants;
import org.rhq.enterprise.server.core.AgentManagerLocal;
/**
* This session beans acts as the single interface with which the distribution algorithm
* will interact. The distribution algorithm runs as a result of various changes in the
* system including but not limited to: newly registering agents, currently connecting
* agents, cloud membership changes (server added/removed), and redistributions according
* to agent load. The result of the distribution algorithm is a single (or a set of)
* {@link FailoverList} objects that are sent down to the connected agents. The agents
* then use these lists to determine which server to fail over to, if their primary server
* is unreachable and/or goes down.
*
* @author Joseph Marques
* @author Jay Shaughnessy
*/
@Stateless
public class FailoverListManagerBean implements FailoverListManagerLocal {
private final Log log = LogFactory.getLog(FailoverListManagerBean.class);
/** The variation in load between most loaded and least loaded server that indicates balanced load. */
private static final double ACCEPTABLE_DISPARITY = 0.10;
@PersistenceContext(unitName = RHQConstants.PERSISTENCE_UNIT_NAME)
private EntityManager entityManager;
@EJB
//@IgnoreDependency
TopologyManagerLocal topologyManager;
@EJB
AgentManagerLocal agentManager;
@EJB
FailoverListManagerLocal failoverListManager;
public FailoverListComposite getExistingForSingleAgent(String agentName) {
Agent agent = agentManager.getAgentByName(agentName);
if (null == agent) {
throw new IllegalArgumentException("No agent found for registration name: " + agentName);
}
return doGetExistingForSingleAgent(agent);
}
private FailoverListComposite doGetExistingForSingleAgent(Agent agent) {
FailoverListComposite result = null;
Query query = entityManager.createNamedQuery(FailoverList.QUERY_GET_VIA_AGENT);
query.setParameter("agent", agent);
try {
FailoverList serverList = (FailoverList) query.getSingleResult();
List<ServerEntry> serverEntries = new ArrayList<ServerEntry>();
for (FailoverListDetails next : serverList.getServerList()) {
serverEntries.add(next.getServer().getServerEntry());
}
result = new FailoverListComposite(serverEntries);
} catch (NoResultException e) {
result = null;
}
return result;
}
public FailoverListComposite getForSingleAgent(PartitionEvent event, String agentName) {
// If a server list already exists then just return it
Agent agent = agentManager.getAgentByName(agentName);
if (null == agent) {
throw new IllegalArgumentException("No agent found for registration name: " + agentName);
}
FailoverListComposite result = doGetExistingForSingleAgent(agent);
if (null == result) {
result = generateServerList(event, agent);
}
return result;
}
private FailoverListComposite generateServerList(PartitionEvent event, Agent agent) {
List<Server> servers = topologyManager.getAllCloudServers();
List<Agent> agents = new ArrayList<Agent>(1);
agents.add(agent);
// get the current agent assignments for the servers
// TODO (jshaughn) Note that "load" in the query name is not true load but rather the count of agents assigned to
// each server (by server list ordinal). This is fine until we decide to introduce relative load values for the
// agents. Even at that this algorithm may be ok for adding new agents and defer to a full repartition to apply
// agent-specific load factors.
Query query = entityManager.createNamedQuery(FailoverListDetails.QUERY_GET_ASSIGNED_LOADS);
@SuppressWarnings("unchecked")
List<FailoverListDetailsComposite> existingLoads = query.getResultList();
Map<Agent, FailoverListComposite> agentServerListMap = getForAgents(event, servers, agents, existingLoads);
persistComposites(event, agentServerListMap);
return (agentServerListMap.get(agent));
}
public Map<Agent, FailoverListComposite> refresh(PartitionEvent event) {
List<Server> servers = topologyManager.getAllCloudServers();
List<Agent> agents = agentManager.getAllAgents();
// persist results immediate, which will be the only writes (as opposed to reads) in this transaction
Map<Agent, FailoverListComposite> agentServerListMap = getForAgents(event, servers, agents, null);
/* now that the intense in-memory manipulation is complete, let's do the stuff that needs to persist the
* results to the database; clear out the existing lists **just** before persisting the new ones to keep
* row lock hold time low
*/
clear(); // clear out the existing server lists because we're going to generate new ones for all agents
persistComposites(event, agentServerListMap);
return agentServerListMap;
}
public Map<Agent, FailoverListComposite> refresh(PartitionEvent event, List<Server> servers, List<Agent> agents) {
// do not persist results immediately, instead return the results and then delete/persist in quick succession
Map<Agent, FailoverListComposite> agentServerListMap = getForAgents(event, servers, agents, null);
/* now that the intense in-memory manipulation is complete, let's do the stuff that needs to persist the
* results to the database; clear out the existing lists **just** before persisting the new ones to keep
* row lock hold time low
*/
for (Agent next : agents) {
// clear out the existing server lists because we're going to generate new ones for all agents
deleteServerListsForAgent(next);
}
persistComposites(event, agentServerListMap);
return agentServerListMap;
}
/*
* NOTE: this method used to persist the agentServerListMap results at the end of processing; however,
* certain callers that performed write operations before calling this method would hold row locks
* too long; so, this method no longer does the persistence, which puts the onus on callers to do so;
* some callers will immediately persist the results, otherwise may want to perform other updates or
* deletions just prior to persistence - the caller now has that option
*/
private Map<Agent, FailoverListComposite> getForAgents(PartitionEvent event, List<Server> servers,
List<Agent> agents, List<FailoverListDetailsComposite> existingLoads) {
Map<Agent, FailoverListComposite> result = new HashMap<Agent, FailoverListComposite>(agents.size());
// create a bucket for each server to which we will assign agents
List<ServerBucket> buckets = new ArrayList<ServerBucket>(servers.size());
for (Server next : servers) {
buckets.add(new ServerBucket(next));
}
// initialize the result map
Map<Agent, List<ServerBucket>> agentServerListMap = new HashMap<Agent, List<ServerBucket>>(agents.size());
for (Agent next : agents) {
agentServerListMap.put(next, new ArrayList<ServerBucket>(servers.size()));
}
// assign server lists level by level: primary, then secondary, then tertiary, etc
for (int level = 0; (level < servers.size()); ++level) {
// Initialize the bucket loads for the next round
initBuckets(buckets, existingLoads, level);
// assign a server for this level to each agent, balancing as we go
// keep track of the how many agents have been assignd on this pass
int agentsAssigned = 0;
// introduce more list disparity by changing the bucket iteration direction on each level
int rotate = (((level % 2) == 0) ? -1 : 1);
for (Agent next : agents) {
List<ServerBucket> serverList = agentServerListMap.get(next);
// When assigning primary (i.e. level 0), supply the current primary as the preferred server.
// This should reduce connection churn by letting most agents stay put (but affects balancing, we'll
// deal with that below)
ServerBucket bestBucket = null;
if ((0 == level) && (null != next.getServer())) {
bestBucket = ServerBucket.getBestBucket(buckets, serverList, next.getAffinityGroup(), next
.getServer().getName());
} else {
bestBucket = ServerBucket.getBestBucket(buckets, serverList, next.getAffinityGroup(), null);
}
// Rotate the list on each iteration. This enhances bucket distribution amongst the levels and ensures
// that we don't starve buckets at the end of the list. Also, we alternate the rotation direction on
// each level which seems to help.
Collections.rotate(buckets, rotate);
// Reverse the buckets completely each time we have assigned an agent to each server. This avoids
// duplicating failover lists completely by not repeating the same server sequence over and over on the
// same level.
if ((++agentsAssigned % buckets.size() == 0)) {
Collections.reverse(buckets);
}
if (null == bestBucket) {
// this should never happen but let's defensively check and log
log.error("Unexpected Condition! null bucket in getForAllAgents()");
continue;
}
serverList.add(bestBucket);
// note that assigned load takes into consideration compute power of the server
bestBucket.assignedLoad += (getAgentLoad(next) / bestBucket.computePower);
bestBucket.assignedAgents.add(next);
}
// For debugging logServerList("Level " + level, agentServerListMap);
// The first pass does a best-effort balancing as it goes but may need further balancing because:
// - the assignment of primary servers tries to retain the current primary server of an existing agent.
// This disrupts the load balancing (but reduces churn).
// - the algorithm is greedy, assigning servers as they are available, this can overload a server near the
// end of assignments (due to, for example, constraints avoiding server duplication in a server list).
// Now, if necessary for load balance, force some agents to new servers.
if (balanceLoad(buckets, agentServerListMap)) {
// for debugging logServerList("Forced Rebalance!", agentServerListMap);
}
}
// generate the result Map
for (Agent next : agentServerListMap.keySet()) {
List<ServerEntry> serverEntries = new ArrayList<ServerEntry>(servers.size());
for (ServerBucket bucket : agentServerListMap.get(next)) {
serverEntries.add(bucket.serverEntry);
}
result.put(next, new FailoverListComposite(serverEntries));
}
return result;
}
private void initBuckets(List<ServerBucket> buckets, List<FailoverListDetailsComposite> existingLoads, int level) {
for (ServerBucket bucket : buckets) {
bucket.assignedLoad = 0.0;
bucket.assignedAgents.clear();
if (null != existingLoads) {
int serverId = bucket.server.getId();
for (FailoverListDetailsComposite existingLoad : existingLoads) {
if ((existingLoad.ordinal == level) && (existingLoad.serverId == serverId)) {
bucket.assignedLoad = (existingLoad.assignedAgentCount / bucket.computePower);
break;
}
}
}
}
}
/**
* Force agents to new servers, if possible, to give us better load balance. A perfect balance is not necessarily
* produced due to:<pre>
* Currently, this algorithm will not break affinity.
* An ACCEPTABLE_DISPARITY between the high server load and low server load is achieved
* no legal swaps are possible.
* <pre>
*/
private boolean balanceLoad(List<ServerBucket> buckets, Map<Agent, List<ServerBucket>> agentServerListMap) {
boolean done = false;
boolean rebalanced = false;
// need at least two buckets to balance
if (buckets.size() < 2)
return false;
do {
// sort buckets from high load to low load
Collections.sort(buckets, new Comparator<ServerBucket>() {
public int compare(ServerBucket bucket1, ServerBucket bucket2) {
return (bucket2.assignedLoad > bucket1.assignedLoad) ? 1 : -1;
}
});
ServerBucket lowBucket = buckets.get(buckets.size() - 1);
// if the load disparity is acceptable then we're done.
if (getLoadDisparity(buckets.get(0).assignedLoad, lowBucket.assignedLoad) < ACCEPTABLE_DISPARITY) {
done = true;
continue;
}
// find an agent to move by traversing the buckets from high to low (excluding lowest bucket)
for (ServerBucket bucket : buckets) {
// if we've looked in all of the buckets and found nothing to move then we're done
if (bucket == lowBucket) {
done = true;
break;
}
AffinityGroup affinityGroup = bucket.server.getAffinityGroup();
boolean checkAffinity = ((null != affinityGroup) && !affinityGroup.equals(lowBucket.server
.getAffinityGroup()));
int highIndex = -1;
double highLoad = 0.0;
double load = 0.0;
for (int i = 0, size = bucket.assignedAgents.size(); (i < size); ++i) {
Agent agent = bucket.assignedAgents.get(i);
// we don't move an agent with satisfied affinity to a bucket that breaks affinity
if (checkAffinity && affinityGroup.equals(agent.getAffinityGroup())) {
continue;
}
// we don't move an agent that is already assigned to lowBucket
if (agentServerListMap.get(agent).contains(lowBucket)) {
continue;
}
load = getAgentLoad(agent);
if (load > highLoad) {
// protect against a move that would send too much load to the lowBucket, effectively just
// reversing the problem and allowing this algorithm to thrash. Don't allow a move that
// increases the lowBucket load higher than the current bucket.
if (!((lowBucket.assignedLoad + load) > (bucket.assignedLoad - load))) {
highIndex = i;
highLoad = load;
}
}
}
// If we found an agent to move then make the move, otherwise look in the next bucket
if (highIndex > -1) {
Agent agent = bucket.assignedAgents.remove(highIndex);
lowBucket.assignedAgents.add(agent);
agentServerListMap.get(agent).remove(bucket);
agentServerListMap.get(agent).add(lowBucket);
lowBucket.assignedLoad += highLoad;
bucket.assignedLoad -= highLoad;
rebalanced = true;
break;
}
}
} while (!done);
return rebalanced;
}
private double getLoadDisparity(Double highLoad, Double lowLoad) {
return ((highLoad - lowLoad) / highLoad);
}
// TODO (jshaughn) figure out how to measure agent load. It should be relative to all other agents, probably normalized such that the average agent
// is load 1.0. All agents must have positive load. If the load needs to be computed here perhaps it should be stored on the AgentServerList
// to avoid recalculation, if it is expensive.
private double getAgentLoad(Agent agent) {
if (null == agent)
return 0.0;
return 1.0;
}
@SuppressWarnings("unused")
private void logServerList(String debugTitle, Map<Agent, List<ServerBucket>> agentServerListMap) {
//if (!log.isInfoEnabled())
// return;
StringBuilder sb = new StringBuilder("\nServerList (");
sb.append(debugTitle);
sb.append(") :");
for (Agent agent : agentServerListMap.keySet()) {
sb.append("\n\n Agent: " + agent.getName());
for (ServerBucket bucket : agentServerListMap.get(agent)) {
sb.append("\n ");
sb.append(bucket.assignedLoad);
sb.append(" : ");
sb.append(bucket.server.getName());
}
}
sb.append("\n\n");
System.out.println(sb.toString());
log.info(sb.toString());
}
public void deleteServerListsForAgent(Agent agent) {
Query query1 = entityManager.createNamedQuery(FailoverListDetails.QUERY_DELETE_VIA_AGENT);
Query query2 = entityManager.createNamedQuery(FailoverList.QUERY_DELETE_VIA_AGENT);
query1.setParameter("agent", agent);
query2.setParameter("agent", agent);
query1.executeUpdate();
query2.executeUpdate();
}
public void deleteServerListDetailsForServer(int serverId) {
Query query = entityManager.createNamedQuery(FailoverListDetails.QUERY_DELETE_VIA_SERVER);
query.setParameter("serverId", serverId);
query.executeUpdate();
}
private void clear() {
Query query = entityManager.createNamedQuery(FailoverListDetails.QUERY_TRUNCATE);
query.executeUpdate();
query = entityManager.createNamedQuery(FailoverList.QUERY_TRUNCATE);
query.executeUpdate();
}
private void persistComposites(PartitionEvent event, Map<Agent, FailoverListComposite> agentServerListMap) {
FailoverList fl = null;
FailoverListDetails failoverListDetails = null;
PartitionEventDetails eventDetails = null;
for (Map.Entry<Agent, FailoverListComposite> next : agentServerListMap.entrySet()) {
Agent nextAgent = next.getKey();
FailoverListComposite nextComposite = next.getValue();
fl = new FailoverList(event, nextAgent);
entityManager.persist(fl);
boolean first = true;
for (int i = 0; i < nextComposite.size(); ++i) {
ServerEntry serverEntry = nextComposite.get(i);
Server server = entityManager.find(Server.class, serverEntry.serverId);
failoverListDetails = new FailoverListDetails(fl, i, server);
entityManager.persist(failoverListDetails);
// event details only shows the current primary server topology
if (first) {
eventDetails = new PartitionEventDetails(event, nextAgent, server);
entityManager.persist(eventDetails);
first = false;
}
}
}
}
}