/*******************************************************************************
*
* Copyright (c) 2012 GigaSpaces Technologies Ltd. All rights reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.openspaces.grid.gsm.rebalancing;
import com.gigaspaces.cluster.activeelection.SpaceMode;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.openspaces.admin.Admin;
import org.openspaces.admin.AdminException;
import org.openspaces.admin.GridComponent;
import org.openspaces.admin.gsa.GridServiceAgent;
import org.openspaces.admin.gsa.GridServiceAgents;
import org.openspaces.admin.gsc.GridServiceContainer;
import org.openspaces.admin.internal.admin.InternalAdmin;
import org.openspaces.admin.internal.pu.InternalProcessingUnit;
import org.openspaces.admin.machine.Machine;
import org.openspaces.admin.pu.DeploymentStatus;
import org.openspaces.admin.pu.ProcessingUnit;
import org.openspaces.admin.pu.ProcessingUnitInstance;
import org.openspaces.admin.space.SpaceInstance;
import org.openspaces.core.internal.commons.math.fraction.Fraction;
import org.openspaces.grid.gsm.capacity.CapacityRequirements;
import org.openspaces.grid.gsm.capacity.CapacityRequirementsPerAgent;
import org.openspaces.grid.gsm.capacity.CpuCapacityRequirement;
import org.openspaces.grid.gsm.containers.ContainersSlaUtils;
import org.openspaces.grid.gsm.rebalancing.exceptions.RemovedContainerProcessingUnitDeploymentException;
import org.openspaces.grid.gsm.rebalancing.exceptions.SpaceRecoveryAfterRelocationException;
import org.openspaces.grid.gsm.rebalancing.exceptions.WrongContainerProcessingUnitRelocationException;
import java.io.File;
import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.logging.Logger;
public class RebalancingUtils {
private static final Log logger = LogFactory.getLog(RebalancingUtils.class);
static Collection<FutureStatelessProcessingUnitInstance> incrementNumberOfStatelessInstancesAsync(
final ProcessingUnit pu,
final GridServiceContainer[] containers,
final Log logger,
final long duration, final TimeUnit timeUnit) {
if (pu.getMaxInstancesPerVM() != 1) {
throw new IllegalArgumentException("Only one instance per VM is allowed");
}
List<GridServiceContainer> unusedContainers = getUnusedContainers(pu,containers);
final Admin admin = pu.getAdmin();
final Map<GridServiceContainer,FutureStatelessProcessingUnitInstance> futureInstances = new HashMap<GridServiceContainer, FutureStatelessProcessingUnitInstance>();
final AtomicInteger targetNumberOfInstances = new AtomicInteger(pu.getNumberOfInstances());
final long start = System.currentTimeMillis();
final long end = start + timeUnit.toMillis(duration);
for (GridServiceContainer container : unusedContainers) {
final GridServiceContainer targetContainer = container;
futureInstances.put(container, new FutureStatelessProcessingUnitInstance() {
AtomicReference<Throwable> throwable = new AtomicReference<Throwable>();
ProcessingUnitInstance newInstance;
public boolean isTimedOut() {
return System.currentTimeMillis() > end;
}
public boolean isDone() {
end();
return isTimedOut() ||
throwable.get() != null ||
newInstance != null;
}
public ProcessingUnitInstance get() throws ExecutionException, IllegalStateException, TimeoutException {
end();
if (getException() != null) {
throw getException();
}
if (newInstance == null) {
if (isTimedOut()) {
throw new TimeoutException("Relocation timeout");
}
throw new IllegalStateException("Async operation is not done yet.");
}
return newInstance;
}
public Date getTimestamp() {
return new Date(start);
}
public ExecutionException getException() {
end();
Throwable t = throwable.get();
if (t != null) {
return new ExecutionException(t.getMessage(), t);
}
return null;
}
public GridServiceContainer getTargetContainer() {
return targetContainer;
}
public ProcessingUnit getProcessingUnit() {
return pu;
}
public String getFailureMessage() throws IllegalStateException {
if (isTimedOut()) {
return "deployment timeout of processing unit " + pu.getName() + " on " +
gscToString(targetContainer);
}
if (getException() != null) {
return getException().getMessage();
}
throw new IllegalStateException("Relocation has not encountered any failure.");
}
private void end() {
if (!targetContainer.isDiscovered()) {
throwable.set(
new RemovedContainerProcessingUnitDeploymentException(
pu,targetContainer));
}
else if (throwable.get() != null || newInstance != null) {
//do nothing. idempotent method
}
else {
incrementInstance();
ProcessingUnitInstance[] instances = targetContainer.getProcessingUnitInstances(pu.getName());
if (instances.length > 0) {
newInstance = instances[0];
}
}
}
private void incrementInstance() {
final String uuid = "[incrementUid:"+UUID.randomUUID().toString()+"] ";
int numberOfInstances = pu.getNumberOfInstances();
int maxNumberOfInstances = getContainersOnMachines(pu).length;
if (numberOfInstances < maxNumberOfInstances) {
if (targetNumberOfInstances.get() == numberOfInstances+1) {
if (logger.isInfoEnabled()) {
logger.info("Waiting for pu.numberOfInstances to increment from "+numberOfInstances + " to " + targetNumberOfInstances.get() + ". "+
"Number of relevant containers " + maxNumberOfInstances);
}
}
else if (admin.getGridServiceManagers().getSize() > 1 && !((InternalProcessingUnit)pu).isBackupGsmInSync() ) {
if (logger.isInfoEnabled()) {
logger.info("Waiting for backup gsm to sync with active gsm");
}
}
else {
targetNumberOfInstances.set(numberOfInstances+1);
if (logger.isInfoEnabled()) {
logger.info(uuid+
" Planning to increment pu.numberOfInstances from "+numberOfInstances + " to " + targetNumberOfInstances.get() + ". "+
"Number of relevant containers " + maxNumberOfInstances);
}
((InternalAdmin) admin).scheduleAdminOperation(new Runnable() {
public void run() {
try {
// this is an async operation
// pu.getNumberOfInstances() still shows the old value.
pu.incrementInstance();
if (logger.isInfoEnabled()) {
logger.info(uuid+" pu.incrementInstance() called");
}
} catch (AdminException e) {
throwable.set(e);
} catch (Throwable e) {
logger.error(uuid+" Unexpected Exception: " + e.getMessage(),e);
throwable.set(e);
}
}
});
}
}
}
});
}
return futureInstances.values();
}
private static List<GridServiceContainer> getUnusedContainers(final ProcessingUnit pu, final GridServiceContainer[] containers) {
// look for free containers
List<GridServiceContainer> unusedContainers = new ArrayList<GridServiceContainer>();
for (GridServiceContainer container : containers) {
if (container.getProcessingUnitInstances(pu.getName()).length == 0) {
unusedContainers.add(container);
}
}
return unusedContainers;
}
public static String puInstancesToString(Collection<ProcessingUnitInstance> instances) {
StringBuilder builder = new StringBuilder();
for (ProcessingUnitInstance instance : instances) {
builder.append(RebalancingUtils.puInstanceToString(instance));
builder.append(File.separator);
}
return builder.toString();
}
static FutureStatefulProcessingUnitInstance relocateProcessingUnitInstanceAsync(
final GridServiceContainer targetContainer,
final ProcessingUnitInstance puInstance,
final Log logger,
final long duration, final TimeUnit timeUnit) {
final ProcessingUnit pu = puInstance.getProcessingUnit();
final GridServiceContainer[] replicationSourceContainers = getReplicationSourceContainers(puInstance);
final int instanceId = puInstance.getInstanceId();
final AtomicReference<Throwable> relocateThrowable = new AtomicReference<Throwable>();
final Admin admin = puInstance.getAdmin();
final int runningNumber = puInstance.getClusterInfo().getRunningNumber();
final String puName = puInstance.getName();
final GridServiceContainer sourceContainer = puInstance.getGridServiceContainer();
final Set<ProcessingUnitInstance> puInstancesFromSamePartition =
getOtherInstancesFromSamePartition(puInstance);
if (logger.isDebugEnabled()) {
logger.debug("Found instances from the same partition as "
+ RebalancingUtils.puInstanceToString(puInstance)
+ " : " + RebalancingUtils.puInstancesToString(puInstancesFromSamePartition));
}
if (puInstancesFromSamePartition.size() != pu.getNumberOfBackups()) {
// total number of instances per partition = numberOfBackups + 1
throw new IllegalStateException(
"puInstancesFromSamePartition has " + puInstancesFromSamePartition.size() +
" instances instead of " + pu.getNumberOfBackups());
}
final long start = System.currentTimeMillis();
final long end = start + timeUnit.toMillis(duration);
((InternalAdmin) admin).scheduleAdminOperation(new Runnable() {
public void run() {
try {
logger.debug("Relocation of " + RebalancingUtils.puInstanceToString(puInstance)
+ " to " + ContainersSlaUtils.gscToString(targetContainer) + " has started.");
puInstance.relocate(targetContainer);
}
catch (AdminException e) {
logger.error("Admin exception " + e.getMessage(),e);
relocateThrowable.set(e);
} catch (Throwable e) {
logger.error("Unexpected exception " + e.getMessage(),e);
relocateThrowable.set(e);
}
}
});
return new FutureStatefulProcessingUnitInstance() {
Throwable throwable;
ProcessingUnitInstance newInstance;
public boolean isTimedOut() {
return System.currentTimeMillis() > end;
}
public boolean isDone() {
endRelocation();
return isTimedOut() ||
throwable != null ||
newInstance != null;
}
public ProcessingUnitInstance get() throws ExecutionException, IllegalStateException, TimeoutException {
endRelocation();
ExecutionException exception = getException();
if (exception != null) {
throw exception;
}
if (newInstance == null) {
if (isTimedOut()) {
throw new TimeoutException("Relocation timeout");
}
throw new IllegalStateException("Async operation is not done yet.");
}
return newInstance;
}
public Date getTimestamp() {
return new Date(start);
}
public ExecutionException getException() {
endRelocation();
if (throwable != null) {
return new ExecutionException(throwable.getMessage(), throwable);
}
return null;
}
/**
* populates this.exception or this.newInstance if relocation is complete
*/
private void endRelocation() {
boolean inProgress = true;
tryStateChange(); // this makes relocation synchronous
if (newInstance != null || throwable != null) {
inProgress = false;
}
if (inProgress) {
if (logger.isDebugEnabled()) {
logger.debug("Relocation from " + ContainersSlaUtils.gscToString(getSourceContainer())
+ " to " + ContainersSlaUtils.gscToString(getTargetContainer()) + " is in progress.");
}
// do nothing. relocate() method running on another thread has not returned yet.
}
}
private void tryStateChange() {
ProcessingUnitInstance relocatedInstance = getRelocatedProcessingUnitInstance();
if (relocatedInstance != null) {
if (relocatedInstance.getGridServiceContainer().equals(targetContainer)) {
if (relocatedInstance.getSpaceInstance() != null &&
relocatedInstance.getSpaceInstance().getMode() != SpaceMode.NONE) {
if (logger.isDebugEnabled()) {
logger.debug("Relocation from " + ContainersSlaUtils.gscToString(getSourceContainer())
+ " to " + ContainersSlaUtils.gscToString(getTargetContainer()) + " had ended successfully.");
}
newInstance = relocatedInstance;
}
}
else {
if (logger.isDebugEnabled()) {
logger.debug("Relocation from " + ContainersSlaUtils.gscToString(getSourceContainer())
+ " to " + ContainersSlaUtils.gscToString(getTargetContainer()) + " has ended with an error.");
}
throwable = new WrongContainerProcessingUnitRelocationException(puInstance,targetContainer);
}
}
}
private ProcessingUnitInstance getRelocatedProcessingUnitInstance() {
for (GridServiceContainer container : admin.getGridServiceContainers()) {
for (ProcessingUnitInstance instance : container.getProcessingUnitInstances(puName)) {
if (!instance.equals(puInstance) &&
instance.getClusterInfo().getRunningNumber() == runningNumber &&
!puInstancesFromSamePartition.contains(instance)) {
return instance;
}
}
}
return null;
}
private boolean isAtLeastOneInstanceValid(Set<ProcessingUnitInstance> instances) {
boolean isValidState = false;
for (ProcessingUnitInstance instance : instances) {
if (instance.isDiscovered() && instance.getGridServiceContainer().isDiscovered()) {
isValidState = true;
break;
}
}
return isValidState;
}
public String getFailureMessage() {
if (isTimedOut()) {
return "relocation timeout of processing unit instance " + instanceId + " from " +
gscToString(sourceContainer) + " to " +
gscToString(targetContainer);
}
if (getException() != null) {
return getException().getMessage();
}
throw new IllegalStateException("Relocation has not encountered any failure.");
}
public GridServiceContainer getTargetContainer() {
return targetContainer;
}
public ProcessingUnit getProcessingUnit() {
return pu;
}
public int getInstanceId() {
return instanceId;
}
public GridServiceContainer getSourceContainer() {
return sourceContainer;
}
public GridServiceContainer[] getReplicaitonSourceContainers() {
return replicationSourceContainers;
}
};
}
/**
* @param instance
* @return list of containers that are used by the relocated processing unit instance to synchronize all data.
*/
public static GridServiceContainer[] getReplicationSourceContainers(ProcessingUnitInstance instance) {
Set<GridServiceContainer> repContainers = new HashSet<GridServiceContainer>();
GridServiceContainer[] containers = instance.getAdmin().getGridServiceContainers().getContainers();
int numberOfBackups = instance.getProcessingUnit().getNumberOfBackups();
if (numberOfBackups == 0) {
return new GridServiceContainer[] {};
}
if (!isProcessingUnitPartitionIntact(instance.getProcessingUnit(), instance.getInstanceId(), containers)) {
throw new IllegalStateException("Cannot relocate pu instance " + puInstanceToString(instance) +" since partition is not intact." );
}
for (int backupId = 0 ; backupId <= numberOfBackups ; backupId++) {
if (backupId != instance.getBackupId()) {
repContainers.add(findProcessingUnitInstance(instance.getProcessingUnit(),instance.getInstanceId(), backupId, containers).getGridServiceContainer());
}
}
return repContainers.toArray(new GridServiceContainer[repContainers.size()]);
}
public static boolean isProcessingUnitIntact(ProcessingUnit pu, GridServiceContainer[] containers) {
boolean intact = true;
if (pu.getStatus() != DeploymentStatus.INTACT) {
intact = false;
}
else {
if (pu.getNumberOfBackups() > 0) {
for (int instanceId = 1 ; intact && instanceId <= pu.getNumberOfInstances() ; instanceId ++) {
if (!isProcessingUnitPartitionIntact(pu, instanceId, containers)) {
intact = false;
break;
}
}
}
else {
ProcessingUnitInstance[] instances = pu.getInstances();
if (instances.length < pu.getNumberOfInstances()) {
intact = false;
}
else {
for (ProcessingUnitInstance instance : instances) {
if (findProcessingUnitInstance(pu, instance.getInstanceId(), 0, containers) == null) {
intact = false;
break;
}
}
}
}
}
return intact;
}
public static boolean isProcessingUnitHasMinimumNumberOfInstancesPerPartition(ProcessingUnit pu, int minimumNumberOfInstancesPerPartition) {
return isProcessingUnitHasMinimumNumberOfInstancesPerPartition(pu, pu.getAdmin().getGridServiceContainers().getContainers(), minimumNumberOfInstancesPerPartition);
}
private static boolean isProcessingUnitHasMinimumNumberOfInstancesPerPartition(ProcessingUnit pu, GridServiceContainer[] containers, int minimumNumberOfInstancesPerPartition) {
boolean hasMinimum = true;
if (pu.getNumberOfBackups() > 0) {
for (int instanceId = 1 ; hasMinimum && instanceId <= pu.getNumberOfInstances() ; instanceId ++) {
if (!isProcessingUnitPartitionHasMinimumNumberOfInstances(pu, instanceId, containers, minimumNumberOfInstancesPerPartition)) {
hasMinimum = false;
break;
}
}
}
else {
hasMinimum = isProcessingUnitIntact(pu,containers);
}
return hasMinimum;
}
public static boolean isProcessingUnitPartitionIntact(ProcessingUnitInstance instance) {
GridServiceContainer[] containers = instance.getAdmin().getGridServiceContainers().getContainers();
return isProcessingUnitPartitionIntact(instance.getProcessingUnit(), instance.getInstanceId(), containers);
}
public static boolean isProcessingUnitIntact(ProcessingUnit pu) {
return isProcessingUnitIntact(pu, pu.getAdmin().getGridServiceContainers().getContainers());
}
private static ProcessingUnitInstance findProcessingUnitInstance(ProcessingUnit pu, int instanceId, int backupId, GridServiceContainer[] containers) {
for (final GridServiceContainer container : containers) {
for (final ProcessingUnitInstance instance : container.getProcessingUnitInstances(pu.getName())) {
if (instance.getInstanceId() == instanceId &&
instance.getBackupId() == backupId){
return instance;
}
}
}
return null;
}
public static boolean isProcessingUnitPartitionIntact(ProcessingUnit pu,int instanceId, GridServiceContainer[] containers) {
return isProcessingUnitPartitionHasMinimumNumberOfInstances(pu, instanceId, containers, 1+pu.getNumberOfBackups());
}
private static boolean isProcessingUnitPartitionHasMinimumNumberOfInstances(ProcessingUnit pu, int instanceId,
GridServiceContainer[] containers, int minimumNumberOfInstancesPerPartition) {
boolean hasMinimum = true;
if (minimumNumberOfInstancesPerPartition >= 1) {
int numberOfPrimaryInstances = 0;
int numberOfBackupInstances = 0;
for (int backupId = 0 ; backupId <= pu.getNumberOfBackups() ; backupId++) {
ProcessingUnitInstance instance = findProcessingUnitInstance(pu, instanceId, backupId, containers);
if (instance != null &&
instance.getSpaceInstance() != null) {
if (instance.getSpaceInstance().getMode() == SpaceMode.BACKUP) {
numberOfBackupInstances++;
}
else if (instance.getSpaceInstance().getMode() == SpaceMode.PRIMARY) {
numberOfPrimaryInstances++;
}
}
}
hasMinimum =
numberOfPrimaryInstances == 1 &&
1+numberOfBackupInstances >= minimumNumberOfInstancesPerPartition;
}
return hasMinimum;
}
/**
* @param instance
* @return all instances from the same partition that is not the specified instance.
*/
public static Set<ProcessingUnitInstance> getOtherInstancesFromSamePartition(ProcessingUnitInstance instance) {
final Set<ProcessingUnitInstance> puInstancesFromSamePartition = new HashSet<ProcessingUnitInstance>();
for (final GridServiceContainer container : instance.getAdmin().getGridServiceContainers()) {
puInstancesFromSamePartition.addAll(getOtherInstancesFromSamePartitionInContainer(container,instance));
}
return puInstancesFromSamePartition;
}
public static Set<ProcessingUnitInstance> getOtherInstancesFromSamePartitionInContainer(
GridServiceContainer container, ProcessingUnitInstance instance) {
Set<ProcessingUnitInstance> puInstancesFromSamePartition = new HashSet<ProcessingUnitInstance>();
for (ProcessingUnitInstance instanceOnContainer : container.getProcessingUnitInstances(instance.getName())) {
if (instanceOnContainer.getInstanceId() == instance.getInstanceId() &&
!instanceOnContainer.equals(instance)) {
puInstancesFromSamePartition.add(instanceOnContainer);
}
}
return puInstancesFromSamePartition;
}
public static Set<ProcessingUnitInstance> getOtherInstancesFromSamePartitionInMachine(
Machine machine, ProcessingUnitInstance puInstance) {
final Set<ProcessingUnitInstance> puInstancesFromSamePartition = new HashSet<ProcessingUnitInstance>();
for (final GridServiceContainer container : machine.getGridServiceContainers()) {
puInstancesFromSamePartition.addAll(getOtherInstancesFromSamePartitionInContainer(container,puInstance));
}
return puInstancesFromSamePartition;
}
public static boolean isEvenlyDistributedAcrossMachines(ProcessingUnit pu, CapacityRequirementsPerAgent aggregatedAllocatedCapacity) {
boolean isEvenlyDistributedAcrossMachines = true;
final Machine[] machines = getMachinesFromAgentUids(pu, aggregatedAllocatedCapacity.getAgentUids());
if (!isProcessingUnitIntact(pu,machines)) {
isEvenlyDistributedAcrossMachines = false;
}
else {
Fraction averageCpuCoresPerPrimaryInstance =
getAverageCpuCoresPerPrimary(pu, aggregatedAllocatedCapacity);
for (Machine source : machines) {
for (Machine target : machines) {
if (target.equals(source)) {
continue;
}
if (isRestartRecommended(pu, source, target, averageCpuCoresPerPrimaryInstance, aggregatedAllocatedCapacity)) {
isEvenlyDistributedAcrossMachines = false;
break;
}
}
}
}
return isEvenlyDistributedAcrossMachines;
}
private static Machine[] getMachinesFromAgentUids(ProcessingUnit pu, Collection<String> agentUids) {
final List<Machine> machines = new ArrayList<Machine>();
final GridServiceAgents gridServiceAgents = pu.getAdmin().getGridServiceAgents();
for (final String agentUid : agentUids) {
final GridServiceAgent agent = gridServiceAgents.getAgentByUID(agentUid);
if (agent == null) {
throw new IllegalStateException("At this point agent " + agentUid + " must be discovered.");
}
machines.add(agent.getMachine());
}
return machines.toArray(new Machine[machines.size()]);
}
public static boolean isRestartRecommended(ProcessingUnit pu, Machine source, Machine target, Fraction optimalCpuCoresPerPrimary, CapacityRequirementsPerAgent allocatedCapacity) {
boolean isRestartRecommended = false;
final int numberOfPrimaryInstancesOnSource = getNumberOfPrimaryInstancesOnMachine(pu, source);
if (numberOfPrimaryInstancesOnSource > 0) {
final int numberOfPrimaryInstancesOnTarget = getNumberOfPrimaryInstancesOnMachine(pu, target);
Fraction cpuCoresOnSource = getNumberOfCpuCores(source, allocatedCapacity);
Fraction cpuCoresOnTarget = getNumberOfCpuCores(target, allocatedCapacity);
final Fraction missingCpuCoresBeforeRestart =
max(Fraction.ZERO,
optimalCpuCoresPerPrimary.multiply(numberOfPrimaryInstancesOnSource)
.subtract(cpuCoresOnSource))
.add(
max(Fraction.ZERO,
optimalCpuCoresPerPrimary.multiply(numberOfPrimaryInstancesOnTarget)
.subtract(cpuCoresOnTarget)));
final Fraction missingCpuCoresAfterRestart =
max(Fraction.ZERO,
optimalCpuCoresPerPrimary.multiply(numberOfPrimaryInstancesOnSource-1)
.subtract(cpuCoresOnSource))
.add(
max(Fraction.ZERO,
optimalCpuCoresPerPrimary.multiply(numberOfPrimaryInstancesOnTarget+1)
.subtract(cpuCoresOnTarget)));
isRestartRecommended = missingCpuCoresAfterRestart.compareTo(missingCpuCoresBeforeRestart) < 0;
}
return isRestartRecommended;
}
private static Fraction max(Fraction a, Fraction b) {
if (b.compareTo(a) > 0) {
return b;
}
return a;
}
/**
* @return true if number of primary instances are evenly distributed across the specified machines
*
public static boolean isEvenlyDistributedAcrossMachines(ProcessingUnit pu, Machine[] machines) {
if (!isProcessingUnitIntact(pu,machines)) {
return false;
}
double averagePrimariesPerCpuCore =
getAverageNumberOfPrimaryInstancesPerCpuCore(pu,machines);
boolean foundMachineWithSurplusPrimaries = false;
boolean foundMachineWithDeficitPrimaries = false;
for (Machine machine : machines) {
if (RebalancingUtils.getAverageNumberOfPrimaryInstancesMinusOnePerCpuCore(pu,machine)
>= averagePrimariesPerCpuCore) {
foundMachineWithSurplusPrimaries = true;
}
else if (RebalancingUtils.getAverageNumberOfPrimaryInstancesPlusOnePerCpuCore(pu,machine)
<= averagePrimariesPerCpuCore) {
foundMachineWithDeficitPrimaries = true;
}
}
return
// everything is balanced
(!foundMachineWithSurplusPrimaries && !foundMachineWithDeficitPrimaries) ||
//not exactly balanced, but there is nothing we can do about it
(!foundMachineWithSurplusPrimaries && foundMachineWithDeficitPrimaries) ||
( foundMachineWithSurplusPrimaries && !foundMachineWithDeficitPrimaries);
}
*/
private static boolean isProcessingUnitIntact(ProcessingUnit pu, Machine[] machines) {
return isProcessingUnitIntact(pu, getContainersOnMachines(pu,machines));
}
/**
* @return all containers that the gsm can deploy the specified pu.
*/
public static GridServiceContainer[] getContainersOnMachines(ProcessingUnit pu) {
return getContainersOnMachines(pu, pu.getAdmin().getMachines().getMachines());
}
private static GridServiceContainer[] getContainersOnMachines(ProcessingUnit pu, Machine[] machines) {
if (pu.getRequiredZones().length != 1) {
throw new IllegalStateException("Processing Unit must have exactly one container zone defined.");
}
final List<GridServiceContainer> containers = new ArrayList<GridServiceContainer>();
for (final Machine machine : machines) {
for (final GridServiceContainer container : machine.getGridServiceContainers()) {
if (container.getZones().size() == 1 &&
container.getZones().containsKey(pu.getRequiredZones()[0])) {
containers.add(container);
}
}
}
return containers.toArray(new GridServiceContainer[containers.size()]);
}
/**
* @return true if number of instances are evenly distributed across the specified containers
*/
public static boolean isEvenlyDistributedAcrossContainers(ProcessingUnit pu, GridServiceContainer[] containers) {
if (!isProcessingUnitIntact(pu,containers)) {
return false;
}
boolean evenlyDistributed = true;
int numberOfInstances = pu.getTotalNumberOfInstances();
int numberOfContainers = containers.length;
if (numberOfInstances < numberOfContainers) {
evenlyDistributed = false;
}
else {
double expectedAverageNumberOfInstancesPerContainer = 1.0 * numberOfInstances / numberOfContainers;
int numberOfServicesPerContainerUpperBound = (int) Math.ceil(expectedAverageNumberOfInstancesPerContainer);
int numberOfServicesPerContainerLowerBound = (int) Math.floor(expectedAverageNumberOfInstancesPerContainer);
for (GridServiceContainer container : containers) {
int puNumberOfInstances = container.getProcessingUnitInstances(pu.getName()).length;
if (puNumberOfInstances < numberOfServicesPerContainerLowerBound ||
puNumberOfInstances > numberOfServicesPerContainerUpperBound) {
evenlyDistributed = false;
break;
}
}
}
return evenlyDistributed;
}
public static Machine[] getMachinesHostingContainers(GridServiceContainer[] containers) {
Set<Machine> machines = new HashSet<Machine>();
for (GridServiceContainer container : containers) {
machines.add(container.getMachine());
}
return machines.toArray(new Machine[machines.size()]);
}
/**
*
* @param container - the container for which planned min number of instances is requested
* @param approvedContainers - the containers approved for deployment for the specified pu
* @param pu - the processing unit
* @return the planned minimum number of instances for the specified container
*/
public static int getPlannedMinimumNumberOfInstancesForContainer(
GridServiceContainer container,
GridServiceContainer[] approvedContainers,
ProcessingUnit pu) {
int min = 0;
if (Arrays.asList(approvedContainers).contains(container)) {
min = (int) Math.floor(getAverageNumberOfInstancesPerContainer(approvedContainers, pu));
}
return min;
}
/**
*
* @param container - the container for which planned min number of instances is requested
* @param approvedContainers - the containers approved for deployment for the specified pu
* @param pu - the processing unit
* @return the planned minimum number of instances for the specified container
*/
public static int getPlannedMaximumNumberOfInstancesForContainer(
GridServiceContainer container,
GridServiceContainer[] approvedContainers,
ProcessingUnit pu) {
int max = 0;
if (Arrays.asList(approvedContainers).contains(container)) {
max = (int) Math.ceil(getAverageNumberOfInstancesPerContainer(approvedContainers, pu));
}
return max;
}
private static double getAverageNumberOfInstancesPerContainer(GridServiceContainer[] approvedContainers, ProcessingUnit pu) {
double avg = ((double) pu.getTotalNumberOfInstances()) / approvedContainers.length;
if (logger.isTraceEnabled()) {
logger.trace("averageInstancesPerContainer = ((double) pu.getTotalNumberOfInstances()) / approvedContainers.length = "
+ ((double) pu.getTotalNumberOfInstances()) + "/" + approvedContainers.length + " = "
+ avg);
}
return avg;
}
/**
* Sorts all of the admin containers based on
* (number of instances from the specified pu - min number of instances)
* If the container is not in the specified approved container list then min=0, meaning
* it will get a higher weight in the sort.
*
*
* @param pu
* @param approvedContainers
* @return the list of sorted containers
* @see RebalancingUtils#getPlannedMinimumNumberOfInstancesForContainer(GridServiceContainer, GridServiceContainer[], ProcessingUnit)
*/
public static List<GridServiceContainer> sortAllContainersByNumberOfInstancesAboveMinimum(
final ProcessingUnit pu, final GridServiceContainer[] approvedContainers) {
final List<GridServiceContainer> sortedContainers =
new ArrayList<GridServiceContainer>(Arrays.asList(
pu.getAdmin().getGridServiceContainers().getContainers()));
Collections.sort(sortedContainers,
new Comparator<GridServiceContainer>() {
public int compare(final GridServiceContainer o1, final GridServiceContainer o2) {
return getNormalizedNumberOfInstances(o1)
- getNormalizedNumberOfInstances(o2);
}
private int getNormalizedNumberOfInstances(final GridServiceContainer container) {
final int numberOfInstances = container.getProcessingUnitInstances(pu.getName()).length;
return numberOfInstances - RebalancingUtils.getPlannedMinimumNumberOfInstancesForContainer(container, approvedContainers, pu);
}
});
return sortedContainers;
}
public static List<Machine> sortMachinesByNumberOfPrimaryInstancesPerCpuCore(
final ProcessingUnit pu,
final Machine[] machines,
final CapacityRequirementsPerAgent allocatedCapacity) {
final List<Machine> sortedMachines =
new ArrayList<Machine>(Arrays.asList(machines));
Collections.sort(sortedMachines,
new Comparator<Machine>() {
public int compare(final Machine m1, final Machine m2) {
if (getNumberOfCpuCores(m1,allocatedCapacity).equals(Fraction.ZERO) ||
getNumberOfCpuCores(m2,allocatedCapacity).equals(Fraction.ZERO)) {
throw new IllegalStateException("Rebalancing assumes positive number of CPU cores per machine");
}
return
getNumberOfPrimaryInstancesPerCpuCore(pu,m1,allocatedCapacity)
.compareTo(
getNumberOfPrimaryInstancesPerCpuCore(pu,m2,allocatedCapacity));
}
});
return sortedMachines;
}
public static Fraction getNumberOfPrimaryInstancesPerCpuCore(ProcessingUnit pu, Machine machine, CapacityRequirementsPerAgent allocatedCapacity) {
return new Fraction(getNumberOfPrimaryInstancesOnMachine(pu, machine))
.divide(getNumberOfCpuCores(machine,allocatedCapacity));
}
public static int getNumberOfPrimaryInstancesOnMachine(ProcessingUnit pu, Machine machine) {
int numberOfPrimaryInstances = 0;
for (GridServiceContainer container : machine.getGridServiceContainers()) {
for (ProcessingUnitInstance instance : container.getProcessingUnitInstances(pu.getName())) {
if (instance.getSpaceInstance() != null &&
instance.getSpaceInstance().getMode() == SpaceMode.PRIMARY) {
numberOfPrimaryInstances++;
}
}
}
return numberOfPrimaryInstances;
}
public static FutureStatefulProcessingUnitInstance restartProcessingUnitInstanceAsync(
ProcessingUnitInstance candidateInstance, Log logger, long timeout, TimeUnit timeUnit) {
return relocateProcessingUnitInstanceAsync(
candidateInstance.getGridServiceContainer(),
candidateInstance,
logger,
timeout, timeUnit);
}
public static Fraction getAverageCpuCoresPerPrimary(
ProcessingUnit pu, CapacityRequirementsPerAgent aggregatedAllocatedCapacity) {
CapacityRequirements totalAllocatedCapacity = aggregatedAllocatedCapacity.getTotalAllocatedCapacity();
if (totalAllocatedCapacity.equalsZero()) {
throw new IllegalStateException("allocated capacity cannot be empty.");
}
return getCpuCores(totalAllocatedCapacity).divide(pu.getNumberOfInstances());
}
private static Fraction getCpuCores(CapacityRequirements totalAllocatedCapacity) {
return totalAllocatedCapacity.getRequirement(new CpuCapacityRequirement().getType()).getCpu();
}
public static Fraction getNumberOfCpuCores(Machine machine, CapacityRequirementsPerAgent allocatedCapacity) {
if (machine.getGridServiceAgents().getSize() != 1) {
throw new IllegalStateException("Machine must have at least one agent");
}
return getCpuCores(allocatedCapacity.getAgentCapacity(machine.getGridServiceAgent().getUid()));
}
public static String puInstanceToString(ProcessingUnitInstance instance) {
StringBuilder builder = new StringBuilder(16);
builder.append("[").append(instance.getInstanceId()).append(",").append(instance.getBackupId() + 1);
SpaceInstance spaceInstance = instance.getSpaceInstance();
if (spaceInstance != null) {
builder.append(",").append(spaceInstance.getMode());
}
builder.append("]");
return builder.toString();
}
public static String machineToString(Machine machine) {
return machine.getHostName() + "/" + machine.getHostAddress();
}
public static String gscToString(GridComponent container) {
return ContainersSlaUtils.gscToString(container);
}
public static String gscsToString(List<GridServiceContainer> containers) {
return ContainersSlaUtils.gscsToString(containers);
}
public static String processingUnitDeploymentToString(ProcessingUnit pu) {
StringBuilder deployment = new StringBuilder();
for (final GridServiceContainer container : pu.getAdmin().getGridServiceContainers()) {
deployment.append(gscToString(container));
deployment.append(" { ");
for (final ProcessingUnitInstance instance : container.getProcessingUnitInstances(pu.getName())) {
deployment.append(puInstanceToString(instance));
deployment.append(" ");
}
deployment.append(" } ");
}
return deployment.toString();
}
}