/*
* Licensed to the Apache Software Foundation (ASF) under one or more contributor license
* agreements. See the NOTICE file distributed with this work for additional information regarding
* copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License. You may obtain a
* copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.geode.internal.cache.partitioned.rebalance;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.logging.log4j.Logger;
import org.apache.geode.cache.partition.PartitionMemberInfo;
import org.apache.geode.distributed.internal.membership.InternalDistributedMember;
import org.apache.geode.internal.Assert;
import org.apache.geode.internal.cache.FixedPartitionAttributesImpl;
import org.apache.geode.internal.cache.GemFireCacheImpl;
import org.apache.geode.internal.cache.PartitionedRegion;
import org.apache.geode.internal.cache.partitioned.InternalPartitionDetails;
import org.apache.geode.internal.cache.partitioned.OfflineMemberDetails;
import org.apache.geode.internal.cache.partitioned.PRLoad;
import org.apache.geode.internal.cache.partitioned.PartitionMemberInfoImpl;
import org.apache.geode.internal.cache.persistence.PersistentMemberID;
import org.apache.geode.internal.i18n.LocalizedStrings;
import org.apache.geode.internal.logging.LogService;
import org.apache.geode.internal.logging.log4j.LocalizedMessage;
/**
* A model of the load on all of the members for a partitioned region. This model is used to find
* the best members to create buckets on or move buckets or primaries too. All of the actual work of
* creating a copy, moving a primary, etc. Is performed by the BucketOperator that is passed to the
* constructor.
*
* To use, create a model and populate it using the addMember method. addMember takes a region
* argument, to indicate which region the data is for. All of the regions added to a single model
* are assumed to be colocated, and the model adds together the load from each of the individual
* regions to balance all of the regions together.
*
* Reblancing operations are performed by repeatedly calling model.nextStep until it returns false.
* Each call to nextStep should perform another operation. The model will make callbacks to the
* BucketOperator you provide to the contructor perform the actual create or move.
*
* While creating redundant copies our moving buckets, this model tries to minimize the standard
* deviation in the weighted loads for the members. The weighted load for the member is the sum of
* the load for all of the buckets on the member divided by that members weight.
*
* This model is not threadsafe.
*
* @since GemFire 6.0
*
*/
@SuppressWarnings("synthetic-access")
public class PartitionedRegionLoadModel {
private static Logger logger = LogService.getLogger();
/**
* A comparator that is used to sort buckets in the order that we should satisfy redundancy - most
* needy buckets first.
*/
private static final Comparator<Bucket> REDUNDANCY_COMPARATOR = new Comparator<Bucket>() {
public int compare(Bucket o1, Bucket o2) {
// put the buckets with the lowest redundancy first
int result = o1.getRedundancy() - o2.getRedundancy();
if (result == 0) {
// put the bucket with the largest load first. This should give us a
// better chance of finding a place to put it
result = Float.compare(o2.getLoad(), o1.getLoad());
}
if (result == 0) {
// finally, just use the id so the comparator doesn't swallow buckets
// with the same load
result = o1.getId() - o2.getId();
}
return result;
}
};
private static final long MEGABYTES = 1024 * 1024;
/**
* A member to represent inconsistent data. For example, if two members think they are the primary
* for a bucket, we will set the primary to invalid, so it won't be a candidate for rebalancing.
*/
final MemberRollup INVALID_MEMBER = new MemberRollup(null, false, false);
private final BucketRollup[] buckets;
/**
* A map of all members that host this partitioned region
*/
private final Map<InternalDistributedMember, MemberRollup> members =
new HashMap<InternalDistributedMember, MemberRollup>();
/**
* The set of all regions that are colocated in this model.
*/
private final Set<String> allColocatedRegions = new HashSet<String>();
/**
* The list of buckets that have low redundancy
*/
private SortedSet<BucketRollup> lowRedundancyBuckets = null;
private SortedSet<BucketRollup> overRedundancyBuckets = null;
private final Collection<Move> attemptedPrimaryMoves = new HashSet<Move>();
private final Collection<Move> attemptedBucketMoves = new HashSet<Move>();
private final Collection<Move> attemptedBucketCreations = new HashSet<Move>();
private final Collection<Move> attemptedBucketRemoves = new HashSet<Move>();
private final BucketOperator operator;
private final int requiredRedundancy;
/** The average primary load on a member */
private float primaryAverage = -1;
/** The average bucket load on a member */
private float averageLoad = -1;
/**
* The minimum improvement in variance that we'll consider worth moving a primary
*/
private double minPrimaryImprovement = -1;
/**
* The minimum improvement in variance that we'll consider worth moving a bucket
*/
private double minImprovement = -1;
private final AddressComparor addressComparor;
private final Set<InternalDistributedMember> criticalMembers;
private final PartitionedRegion partitionedRegion;
/**
* Create a new model
*
* @param operator the operator which performs the actual creates/moves for buckets
* @param redundancyLevel The expected redundancy level for the region
*/
public PartitionedRegionLoadModel(BucketOperator operator, int redundancyLevel, int numBuckets,
AddressComparor addressComparor, Set<InternalDistributedMember> criticalMembers,
PartitionedRegion region) {
this.operator = operator;
this.requiredRedundancy = redundancyLevel;
this.buckets = new BucketRollup[numBuckets];
this.addressComparor = addressComparor;
this.criticalMembers = criticalMembers;
this.partitionedRegion = region;
}
/**
* Add a region to the model. All regions that are added are assumed to be colocated. The first
* region added to the model should be the parent region. The parent region is expected to have at
* least as many members as child regions; it may have more. If the parent has more members than
* child regions those members will be considered invalid.
*
* @param region
* @param memberDetailSet A set of details about each member.
* @param offlineDetails
*/
public void addRegion(String region,
Collection<? extends InternalPartitionDetails> memberDetailSet,
OfflineMemberDetails offlineDetails, boolean enforceLocalMaxMemory) {
this.allColocatedRegions.add(region);
// build up a list of members and an array of buckets for this
// region. Each bucket has a reference to all of the members
// that host it and each member has a reference to all of the buckets
// it hosts
Map<InternalDistributedMember, Member> regionMember =
new HashMap<InternalDistributedMember, Member>();
Bucket[] regionBuckets = new Bucket[this.buckets.length];
for (InternalPartitionDetails memberDetails : memberDetailSet) {
InternalDistributedMember memberId =
(InternalDistributedMember) memberDetails.getDistributedMember();
boolean isCritical = criticalMembers.contains(memberId);
Member member = new Member(memberId, memberDetails.getPRLoad().getWeight(),
memberDetails.getConfiguredMaxMemory(), isCritical, enforceLocalMaxMemory);
regionMember.put(memberId, member);
PRLoad load = memberDetails.getPRLoad();
for (int i = 0; i < regionBuckets.length; i++) {
if (load.getReadLoad(i) > 0) {
Bucket bucket = regionBuckets[i];
if (bucket == null) {
Set<PersistentMemberID> offlineMembers = offlineDetails.getOfflineMembers(i);
bucket =
new Bucket(i, load.getReadLoad(i), memberDetails.getBucketSize(i), offlineMembers);
regionBuckets[i] = bucket;
}
bucket.addMember(member);
if (load.getWriteLoad(i) > 0) {
if (bucket.getPrimary() == null) {
bucket.setPrimary(member, load.getWriteLoad(i));
} else if (!bucket.getPrimary().equals(member)) {
bucket.setPrimary(INVALID_MEMBER, 1);
}
}
}
}
}
// add each member for this region to a rollup of all colocated
// regions
for (Member member : regionMember.values()) {
InternalDistributedMember memberId = member.getDistributedMember();
MemberRollup memberSum = this.members.get(memberId);
boolean isCritical = criticalMembers.contains(memberId);
if (memberSum == null) {
memberSum = new MemberRollup(memberId, isCritical, enforceLocalMaxMemory);
this.members.put(memberId, memberSum);
}
memberSum.addColocatedMember(region, member);
}
// Now, add the region to the rollups of the colocated
// regions and buckets
for (int i = 0; i < this.buckets.length; i++) {
if (regionBuckets[i] == null) {
// do nothing, this bucket is not hosted for this region.
// [sumedh] remove from buckets array too to be consistent since
// this method will be invoked repeatedly for all colocated regions,
// and then we may miss some colocated regions for a bucket leading
// to all kinds of issues later
this.buckets[i] = null;
continue;
}
if (this.buckets[i] == null) {
// If this is the first region we have seen that is hosting this bucket, create a bucket
// rollup
this.buckets[i] = new BucketRollup(i);
}
// Add all of the members hosting the bucket to the rollup
for (Member member : regionBuckets[i].getMembersHosting()) {
InternalDistributedMember memberId = member.getDistributedMember();
this.buckets[i].addMember(this.members.get(memberId));
}
// set the primary for the rollup
if (regionBuckets[i].getPrimary() != null) {
if (this.buckets[i].getPrimary() == null) {
InternalDistributedMember memberId = regionBuckets[i].getPrimary().getDistributedMember();
this.buckets[i].setPrimary(this.members.get(memberId), 0);
} else {
if (!(this.buckets[i].getPrimary() == INVALID_MEMBER)) {
if (!this.buckets[i].getPrimary().getDistributedMember()
.equals(regionBuckets[i].getPrimary().getDistributedMember())) {
if (logger.isDebugEnabled()) {
logger.debug(
"PartitionedRegionLoadModel - Setting bucket {} to INVALID because it is the primary on two members.This could just be a race in the collocation of data. member1={} member2={}",
this.buckets[i], this.buckets[i].getPrimary(), regionBuckets[i].getPrimary());
}
this.buckets[i].setPrimary(INVALID_MEMBER, 0);
}
}
}
}
this.buckets[i].addColocatedBucket(region, regionBuckets[i]);
}
// TODO rebalance - there is a possibility of adding members
// back here, which I don't like. I think maybe all of the regions should be in the
// constructor for the load model, and then when the constructor is done
// we can do with validation.
// If any members don't have this new region, remove them.
for (Iterator<Entry<InternalDistributedMember, MemberRollup>> itr =
members.entrySet().iterator(); itr.hasNext();) {
MemberRollup memberRollup = itr.next().getValue();
if (!memberRollup.getColocatedMembers().keySet().equals(this.allColocatedRegions)) {
itr.remove();
if (logger.isDebugEnabled()) {
logger.debug(
"PartitionedRegionLoadModel - removing member {} from the consideration because it doesn't have all of the colocated regions. Expected={}, was={}",
memberRollup, allColocatedRegions, memberRollup.getColocatedMembers());
}
// This state should never happen
if (!memberRollup.getBuckets().isEmpty()) {
logger.warn(LocalizedMessage.create(
LocalizedStrings.PartitionedRegionLoadModel_INCOMPLETE_COLOCATION,
new Object[] {memberRollup, this.allColocatedRegions,
memberRollup.getColocatedMembers().keySet(), memberRollup.getBuckets()}));
}
for (Bucket bucket : new HashSet<Bucket>(memberRollup.getBuckets())) {
bucket.removeMember(memberRollup);
}
}
}
}
public void initialize() {
resetAverages();
initOverRedundancyBuckets();
initLowRedundancyBuckets();
}
public SortedSet<BucketRollup> getLowRedundancyBuckets() {
return lowRedundancyBuckets;
}
public SortedSet<BucketRollup> getOverRedundancyBuckets() {
return overRedundancyBuckets;
}
public void setOverRedundancyBuckets(SortedSet<BucketRollup> overRedundancyBuckets) {
this.overRedundancyBuckets = overRedundancyBuckets;
}
public boolean enforceUniqueZones() {
return addressComparor.enforceUniqueZones();
}
public void ignoreLowRedundancyBucket(BucketRollup first) {
this.lowRedundancyBuckets.remove(first);
}
public void ignoreOverRedundancyBucket(BucketRollup first) {
this.overRedundancyBuckets.remove(first);
}
public MemberRollup getMember(InternalDistributedMember target) {
return members.get(target);
}
public BucketRollup[] getBuckets() {
return buckets;
}
public String getName() {
return getPartitionedRegion().getFullPath();
}
public PartitionedRegion getPartitionedRegion() {
// TODO - this model really should not have
// a reference to the partitioned region object.
// The fixed PR code currently depends on this
// partitioned region object and needs
// refactoring.
return partitionedRegion;
}
private Map<String, Long> getColocatedRegionSizes(BucketRollup bucket) {
Map<String, Long> colocatedRegionSizes = new HashMap<String, Long>();
for (Map.Entry<String, Bucket> entry : bucket.getColocatedBuckets().entrySet()) {
colocatedRegionSizes.put(entry.getKey(), Long.valueOf(entry.getValue().getBytes()));
}
return colocatedRegionSizes;
}
/**
* Trigger the creation of a redundant bucket, potentially asynchronously.
*
* This method will find the best node to create a redundant bucket and invoke the bucket operator
* to create a bucket on that node. Because the bucket operator is asynchronous, the bucket may
* not be created immediately, but the model will be updated regardless. Invoke
* {@link #waitForOperations()} to wait for those operations to actually complete
*/
public void createRedundantBucket(final BucketRollup bucket, final Member targetMember) {
Map<String, Long> colocatedRegionSizes = getColocatedRegionSizes(bucket);
final Move move = new Move(null, targetMember, bucket);
this.lowRedundancyBuckets.remove(bucket);
bucket.addMember(targetMember);
// put the bucket back into the list if we still need to satisfy redundancy for
// this bucket
if (bucket.getRedundancy() < this.requiredRedundancy) {
this.lowRedundancyBuckets.add(bucket);
}
resetAverages();
this.operator.createRedundantBucket(targetMember.getMemberId(), bucket.getId(),
colocatedRegionSizes, new BucketOperator.Completion() {
@Override
public void onSuccess() {}
@Override
public void onFailure() {
// If the bucket creation failed, we need to undo the changes
// we made to the model
attemptedBucketCreations.add(move);
// remove the bucket from lowRedundancyBuckets before mutating the state
lowRedundancyBuckets.remove(bucket);
bucket.removeMember(targetMember);
if (bucket.getRedundancy() < requiredRedundancy) {
lowRedundancyBuckets.add(bucket);
}
resetAverages();
}
});
}
protected void remoteOverRedundancyBucket(BucketRollup bucket, Member targetMember) {
Move bestMove = new Move(null, targetMember, bucket);
Map<String, Long> colocatedRegionSizes = getColocatedRegionSizes(bucket);
if (!this.operator.removeBucket(targetMember.getMemberId(), bucket.getId(),
colocatedRegionSizes)) {
this.attemptedBucketRemoves.add(bestMove);
} else {
this.overRedundancyBuckets.remove(bucket);
bucket.removeMember(targetMember);
// put the bucket back into the list if we still need to satisfy redundancy for
// this bucket
if (bucket.getOnlineRedundancy() > this.requiredRedundancy) {
this.overRedundancyBuckets.add(bucket);
}
resetAverages();
}
}
private void initLowRedundancyBuckets() {
this.lowRedundancyBuckets = new TreeSet<BucketRollup>(REDUNDANCY_COMPARATOR);
for (BucketRollup b : this.buckets) {
if (b != null && b.getRedundancy() >= 0 && b.getRedundancy() < this.requiredRedundancy) {
this.lowRedundancyBuckets.add(b);
}
}
}
private void initOverRedundancyBuckets() {
this.overRedundancyBuckets = new TreeSet<BucketRollup>(REDUNDANCY_COMPARATOR);
for (BucketRollup b : this.buckets) {
if (b != null && b.getOnlineRedundancy() > this.requiredRedundancy) {
this.overRedundancyBuckets.add(b);
}
}
}
/**
* Find the best member to put a new bucket on.
*
* @param bucket the bucket we want to create
* @param checkIPAddress true if we should only consider members that do not have the same IP
* Address as a member that already hosts the bucket
*/
public Move findBestTarget(Bucket bucket, boolean checkIPAddress) {
float leastCost = Float.MAX_VALUE;
Move bestMove = null;
for (Member member : this.members.values()) {
if (member.willAcceptBucket(bucket, null, checkIPAddress).willAccept()) {
float cost = (member.getTotalLoad() + bucket.getLoad()) / member.getWeight();
if (cost < leastCost) {
Move move = new Move(null, member, bucket);
if (!this.attemptedBucketCreations.contains(move)) {
leastCost = cost;
bestMove = move;
}
}
}
}
return bestMove;
}
/**
* Find the best member to remove a bucket from
*
* @param bucket the bucket we want to create
*/
public Move findBestRemove(Bucket bucket) {
float mostLoaded = Float.MIN_VALUE;
Move bestMove = null;
for (Member member : bucket.getMembersHosting()) {
float newLoad = (member.getTotalLoad() - bucket.getLoad()) / member.getWeight();
if (newLoad > mostLoaded && !member.equals(bucket.getPrimary())) {
Move move = new Move(null, member, bucket);
if (!this.attemptedBucketRemoves.contains(move)) {
mostLoaded = newLoad;
bestMove = move;
}
}
}
return bestMove;
}
public Move findBestTargetForFPR(Bucket bucket, boolean checkIPAddress) {
Move noMove = null;
InternalDistributedMember targetMemberID = null;
Member targetMember = null;
List<FixedPartitionAttributesImpl> fpas =
this.partitionedRegion.getFixedPartitionAttributesImpl();
if (fpas != null) {
for (FixedPartitionAttributesImpl fpaImpl : fpas) {
if (fpaImpl.hasBucket(bucket.getId())) {
targetMemberID =
this.partitionedRegion.getDistributionManager().getDistributionManagerId();
if (this.members.containsKey(targetMemberID)) {
targetMember = this.members.get(targetMemberID);
if (targetMember.willAcceptBucket(bucket, null, checkIPAddress).willAccept()) {
// We should have just one move for creating
// all the buckets for a FPR on this node.
return new Move(null, targetMember, bucket);
}
}
}
}
}
return noMove;
}
protected boolean movePrimary(Move bestMove) {
Member bestSource = bestMove.getSource();
Member bestTarget = bestMove.getTarget();
Bucket bestBucket = bestMove.getBucket();
boolean successfulMove = this.operator.movePrimary(bestSource.getDistributedMember(),
bestTarget.getDistributedMember(), bestBucket.getId());
if (successfulMove) {
bestBucket.setPrimary(bestTarget, bestBucket.getPrimaryLoad());
}
boolean entryAdded = this.attemptedPrimaryMoves.add(bestMove);
Assert.assertTrue(entryAdded,
"PartitionedRegionLoadModel.movePrimarys - excluded set is not growing, so we probably would have an infinite loop here");
return successfulMove;
}
public Move findBestPrimaryMove() {
Move bestMove = null;
double bestImprovement = 0;
for (Member source : this.members.values()) {
for (Bucket bucket : source.getPrimaryBuckets()) {
for (Member target : bucket.getMembersHosting()) {
if (source.equals(target)) {
continue;
}
double improvement =
improvement(source.getPrimaryLoad(), source.getWeight(), target.getPrimaryLoad(),
target.getWeight(), bucket.getPrimaryLoad(), getPrimaryAverage());
if (improvement > bestImprovement && improvement > getMinPrimaryImprovement()) {
Move move = new Move(source, target, bucket);
if (!this.attemptedPrimaryMoves.contains(move)) {
bestImprovement = improvement;
bestMove = move;
}
}
}
}
}
return bestMove;
}
/**
* Move all primary from other to this
*/
private void makeFPRPrimaryForThisNode() {
List<FixedPartitionAttributesImpl> FPAs =
this.partitionedRegion.getFixedPartitionAttributesImpl();
InternalDistributedMember targetId = this.partitionedRegion.getDistributionManager().getId();
Member target = this.members.get(targetId);
Set<Bucket> unsuccessfulAttempts = new HashSet<Bucket>();
for (Bucket bucket : this.buckets) {
if (bucket != null) {
for (FixedPartitionAttributesImpl fpa : FPAs) {
if (fpa.hasBucket(bucket.id) && fpa.isPrimary()) {
Member source = bucket.primary;
bucket.getPrimary();
if (source != target) {
// HACK: In case we don't know who is Primary at this time
// we just set source as target too for stat purposes
InternalDistributedMember srcDM = (source == null || source == INVALID_MEMBER)
? target.getDistributedMember() : source.getDistributedMember();
if (logger.isDebugEnabled()) {
logger.debug(
"PRLM#movePrimariesForFPR: For Bucket#{}, moving primary from source {} to target {}",
bucket.getId(), bucket.primary, target);
}
boolean successfulMove =
this.operator.movePrimary(srcDM, target.getDistributedMember(), bucket.getId());
unsuccessfulAttempts.add(bucket);
// We have to move the primary otherwise there is some problem!
Assert.assertTrue(successfulMove,
" Fixed partitioned region not able to move the primary!");
if (successfulMove) {
if (logger.isDebugEnabled()) {
logger.debug(
"PRLM#movePrimariesForFPR: For Bucket#{}, moving primary source {} to target {}",
bucket.getId(), source, target);
}
bucket.setPrimary(target, bucket.getPrimaryLoad());
}
}
}
}
}
}
}
/**
* Calculate the target weighted number of primaries on each node.
*/
private float getPrimaryAverage() {
if (this.primaryAverage == -1) {
float totalWeight = 0;
float totalPrimaryCount = 0;
for (Member member : this.members.values()) {
totalPrimaryCount += member.getPrimaryLoad();
totalWeight += member.getWeight();
}
this.primaryAverage = totalPrimaryCount / totalWeight;
}
return this.primaryAverage;
}
/**
* Calculate the target weighted amount of data on each node.
*/
private float getAverageLoad() {
if (this.averageLoad == -1) {
float totalWeight = 0;
float totalLoad = 0;
for (Member member : this.members.values()) {
totalLoad += member.getTotalLoad();
totalWeight += member.getWeight();
}
this.averageLoad = totalLoad / totalWeight;
}
return this.averageLoad;
}
/**
* Calculate the minimum improvement in variance that will we consider worth while. Currently this
* is calculated as the improvement in variance that would occur by removing the smallest bucket
* from the member with the largest weight.
*/
private double getMinPrimaryImprovement() {
if ((this.minPrimaryImprovement + 1.0) < .0000001) { // i.e. == -1
float largestWeight = 0;
float smallestBucket = 0;
for (Member member : this.members.values()) {
if (member.getWeight() > largestWeight) {
largestWeight = member.getWeight();
}
for (Bucket bucket : member.getPrimaryBuckets()) {
if (bucket.getPrimaryLoad() < smallestBucket || smallestBucket == 0) {
smallestBucket = bucket.getPrimaryLoad();
}
}
}
double before = variance(getPrimaryAverage() * largestWeight + smallestBucket, largestWeight,
getPrimaryAverage());
double after =
variance(getPrimaryAverage() * largestWeight, largestWeight, getPrimaryAverage());
this.minPrimaryImprovement = (before - after) / smallestBucket;
}
return this.minPrimaryImprovement;
}
/**
* Calculate the minimum improvement in variance that will we consider worth while. Currently this
* is calculated as the improvement in variance that would occur by removing the smallest bucket
* from the member with the largest weight.
*/
private double getMinImprovement() {
if ((this.minImprovement + 1.0) < .0000001) { // i.e. == -1
float largestWeight = 0;
float smallestBucket = 0;
for (Member member : this.members.values()) {
if (member.getWeight() > largestWeight) {
largestWeight = member.getWeight();
}
// find the smallest bucket, ignoring empty buckets.
for (Bucket bucket : member.getBuckets()) {
if (smallestBucket == 0 || (bucket.getLoad() < smallestBucket && bucket.getBytes() > 0)) {
smallestBucket = bucket.getLoad();
}
}
}
double before = variance(getAverageLoad() * largestWeight + smallestBucket, largestWeight,
getAverageLoad());
double after = variance(getAverageLoad() * largestWeight, largestWeight, getAverageLoad());
this.minImprovement = (before - after) / smallestBucket;
}
return this.minImprovement;
}
private void resetAverages() {
this.primaryAverage = -1;
this.averageLoad = -1;
this.minPrimaryImprovement = -1;
this.minImprovement = -1;
}
/**
* Calculate how much the variance in load will decrease for a given move.
*
* @param sLoad the current load on the source member
* @param sWeight the weight of the source member
* @param tLoad the current load on the target member
* @param tWeight the weight of the target member
* @param bucketSize the size of the bucket we're considering moving
* @param average the target weighted load for all members.
* @return the change in variance that would occur by making this move. Essentially
* variance_before - variance_after, so a positive change is a means the variance is
* decreasing.
*/
private double improvement(float sLoad, float sWeight, float tLoad, float tWeight,
float bucketSize, float average) {
double vSourceBefore = variance(sLoad, sWeight, average);
double vSourceAfter = variance(sLoad - bucketSize, sWeight, average);
double vTargetBefore = variance(tLoad, tWeight, average);
double vTargetAfter = variance(tLoad + bucketSize, tWeight, average);
double improvement = vSourceBefore - vSourceAfter + vTargetBefore - vTargetAfter;
return improvement / bucketSize;
}
private double variance(double load, double weight, double average) {
double deviation = (load / weight - average);
return deviation * deviation;
}
public Move findBestBucketMove() {
Move bestMove = null;
double bestImprovement = 0;
for (Member source : this.members.values()) {
for (Bucket bucket : source.getBuckets()) {
for (Member target : this.members.values()) {
if (bucket.getMembersHosting().contains(target)) {
continue;
}
if (!target.willAcceptBucket(bucket, source, true).willAccept()) {
continue;
}
double improvement = improvement(source.getTotalLoad(), source.getWeight(),
target.getTotalLoad(), target.getWeight(), bucket.getLoad(), getAverageLoad());
if (improvement > bestImprovement && improvement > getMinImprovement()) {
Move move = new Move(source, target, bucket);
if (!this.attemptedBucketMoves.contains(move)) {
bestImprovement = improvement;
bestMove = move;
}
}
}
}
}
return bestMove;
}
protected boolean moveBucket(Move bestMove) {
Member bestSource = bestMove.getSource();
Member bestTarget = bestMove.getTarget();
BucketRollup bestBucket = (BucketRollup) bestMove.getBucket();
Map<String, Long> colocatedRegionSizes = getColocatedRegionSizes(bestBucket);
boolean successfulMove = this.operator.moveBucket(bestSource.getDistributedMember(),
bestTarget.getDistributedMember(), bestBucket.getId(), colocatedRegionSizes);
if (successfulMove) {
bestBucket.addMember(bestTarget);
if (bestSource.equals(bestBucket.getPrimary())) {
bestBucket.setPrimary(bestTarget, bestBucket.getPrimaryLoad());
}
bestBucket.removeMember(bestSource);
}
boolean entryAdded = this.attemptedBucketMoves.add(bestMove);
Assert.assertTrue(entryAdded,
"PartitionedRegionLoadModel.moveBuckets - excluded set is not growing, so we probably would have an infinite loop here");
return successfulMove;
}
/**
* Return a snapshot of what the partitioned member details look like.
*
* @return a set of partitioned member details.
*/
public Set<PartitionMemberInfo> getPartitionedMemberDetails(String region) {
TreeSet<PartitionMemberInfo> result = new TreeSet<PartitionMemberInfo>();
for (MemberRollup member : this.members.values()) {
Member colocatedMember = member.getColocatedMember(region);
if (colocatedMember != null) {
result.add(new PartitionMemberInfoImpl(colocatedMember.getDistributedMember(),
colocatedMember.getConfiguredMaxMemory(), colocatedMember.getSize(),
colocatedMember.getBucketCount(), colocatedMember.getPrimaryCount()));
}
}
return result;
}
/**
* For testing only, calculate the total variance of the members
*/
public float getVarianceForTest() {
float variance = 0;
for (Member member : this.members.values()) {
variance += variance(member.getTotalLoad(), member.getWeight(), getAverageLoad());
}
return variance;
}
/**
* For testing only, calculate the total variance of the members
*/
public float getPrimaryVarianceForTest() {
float variance = 0;
for (Member member : this.members.values()) {
variance += variance(member.getPrimaryLoad(), member.getWeight(), getPrimaryAverage());
}
return variance;
}
/**
* Wait for the bucket operator to complete any pending asynchronous operations.
*/
public void waitForOperations() {
operator.waitForOperations();
}
@Override
public String toString() {
StringBuilder result = new StringBuilder();
TreeSet<Bucket> allBucketIds = new TreeSet<Bucket>(new Comparator<Bucket>() {
public int compare(Bucket o1, Bucket o2) {
return o1.getId() - o2.getId();
}
});
if (this.members.isEmpty()) {
return "";
}
int longestMemberId = 0;
for (Member member : this.members.values()) {
allBucketIds.addAll(member.getBuckets());
int memberIdLength = member.getDistributedMember().toString().length();
if (longestMemberId < memberIdLength) {
longestMemberId = memberIdLength;
}
}
result
.append(String.format("%" + longestMemberId + "s primaries size(MB) max(MB)", "MemberId"));
for (Bucket bucket : allBucketIds) {
result.append(String.format("%4s", bucket.getId()));
}
for (Member member : this.members.values()) {
result.append(String.format("\n%" + longestMemberId + "s %9.0f %8.2f %8.2f",
member.getDistributedMember(), member.getPrimaryLoad(),
member.getSize() / (float) MEGABYTES,
member.getConfiguredMaxMemory() / (float) MEGABYTES));
for (Bucket bucket : allBucketIds) {
char symbol;
if (member.getPrimaryBuckets().contains(bucket)) {
symbol = 'P';
} else if (member.getBuckets().contains(bucket)) {
symbol = 'R';
} else {
symbol = 'X';
}
result.append(" ").append(symbol);
}
}
result.append(String.format("\n%" + longestMemberId + "s ",
"#offline", 0, 0, 0));
for (Bucket bucket : allBucketIds) {
result.append(String.format("%4s", bucket.getOfflineMembers().size()));
}
return result.toString();
}
/**
* Represents the sum of all of the colocated regions on a given member. Also, holds a map of all
* of the colocated regions hosted on this member.
*/
private class MemberRollup extends Member {
private final Map<String, Member> colocatedMembers = new HashMap<String, Member>();
private final boolean invalid = false;
public MemberRollup(InternalDistributedMember memberId, boolean isCritical,
boolean enforceLocalMaxMemory) {
super(memberId, isCritical, enforceLocalMaxMemory);
}
/**
* Indicates that this member doesn't have all of the colocated regions
*/
public boolean isInvalid() {
return invalid;
}
public boolean addColocatedMember(String region, Member member) {
if (!getColocatedMembers().containsKey(region)) {
this.getColocatedMembers().put(region, member);
this.weight += member.weight;
this.localMaxMemory += member.localMaxMemory;
return true;
}
return false;
}
public Member getColocatedMember(String region) {
return getColocatedMembers().get(region);
}
/**
* Update the load on this member rollup with a change in size of one of the bucket rollups
* hosted by this member
*/
public void updateLoad(float load, float primaryLoad, float bytes) {
this.totalLoad += load;
this.totalPrimaryLoad += primaryLoad;
this.totalBytes += bytes;
}
@Override
public boolean addBucket(Bucket bucket) {
if (super.addBucket(bucket)) {
BucketRollup bucketRollup = (BucketRollup) bucket;
for (Map.Entry<String, Member> entry : getColocatedMembers().entrySet()) {
String region = entry.getKey();
Member member = entry.getValue();
Bucket colocatedBucket = bucketRollup.getColocatedBuckets().get(region);
if (colocatedBucket != null) {
member.addBucket(colocatedBucket);
}
}
return true;
}
return false;
}
@Override
public boolean removeBucket(Bucket bucket) {
if (super.removeBucket(bucket)) {
BucketRollup bucketRollup = (BucketRollup) bucket;
for (Map.Entry<String, Member> entry : getColocatedMembers().entrySet()) {
String region = entry.getKey();
Member member = entry.getValue();
Bucket colocatedBucket = bucketRollup.getColocatedBuckets().get(region);
if (colocatedBucket != null) {
member.removeBucket(colocatedBucket);
}
}
return true;
}
return false;
}
@Override
public boolean addPrimary(Bucket bucket) {
if (super.addPrimary(bucket)) {
BucketRollup bucketRollup = (BucketRollup) bucket;
for (Map.Entry<String, Member> entry : getColocatedMembers().entrySet()) {
String region = entry.getKey();
Member member = entry.getValue();
Bucket colocatedBucket = bucketRollup.getColocatedBuckets().get(region);
if (colocatedBucket != null) {
member.addPrimary(colocatedBucket);
}
}
return true;
}
return false;
}
@Override
public boolean removePrimary(Bucket bucket) {
if (super.removePrimary(bucket)) {
BucketRollup bucketRollup = (BucketRollup) bucket;
for (Map.Entry<String, Member> entry : getColocatedMembers().entrySet()) {
String region = entry.getKey();
Member member = entry.getValue();
Bucket colocatedBucket = bucketRollup.getColocatedBuckets().get(region);
if (colocatedBucket != null) {
member.removePrimary(colocatedBucket);
}
}
return true;
}
return false;
}
@Override
public RefusalReason willAcceptBucket(Bucket bucket, Member source, boolean checkIPAddress) {
RefusalReason reason = super.willAcceptBucket(bucket, source, checkIPAddress);
if (reason.willAccept()) {
BucketRollup bucketRollup = (BucketRollup) bucket;
MemberRollup sourceRollup = (MemberRollup) source;
for (Map.Entry<String, Member> entry : getColocatedMembers().entrySet()) {
String region = entry.getKey();
Member member = entry.getValue();
Bucket colocatedBucket = bucketRollup.getColocatedBuckets().get(region);
Member colocatedSource =
sourceRollup == null ? null : sourceRollup.getColocatedMembers().get(region);
if (colocatedBucket != null) {
reason = member.willAcceptBucket(colocatedBucket, colocatedSource, checkIPAddress);
if (!reason.willAccept()) {
return reason;
}
}
}
return RefusalReason.NONE;
}
return reason;
}
Map<String, Member> getColocatedMembers() {
return this.colocatedMembers;
}
}
/**
* Represents the sum of all of colocated buckets with a given bucket id.
*
*/
protected class BucketRollup extends Bucket {
private final Map<String, Bucket> colocatedBuckets = new HashMap<String, Bucket>();
public BucketRollup(int id) {
super(id);
}
/**
* @param region
* @param b
*/
public boolean addColocatedBucket(String region, Bucket b) {
if (!this.getColocatedBuckets().containsKey(region)) {
this.getColocatedBuckets().put(region, b);
this.load += b.getLoad();
this.primaryLoad += b.getPrimaryLoad();
this.bytes += b.getBytes();
this.offlineMembers.addAll(b.getOfflineMembers());
// Update the load on the members hosting this bucket
// to reflect the fact that the bucket is larger now.
for (Member member : getMembersHosting()) {
MemberRollup rollup = (MemberRollup) member;
float primaryLoad = 0;
if (this.getPrimary() == member) {
primaryLoad = b.getPrimaryLoad();
}
rollup.updateLoad(b.getLoad(), primaryLoad, b.getBytes());
}
return true;
}
return false;
}
@Override
public boolean addMember(Member targetMember) {
if (super.addMember(targetMember)) {
MemberRollup memberRollup = (MemberRollup) targetMember;
for (Map.Entry<String, Bucket> entry : getColocatedBuckets().entrySet()) {
String region = entry.getKey();
Bucket bucket = entry.getValue();
Member member = memberRollup.getColocatedMembers().get(region);
if (member != null) {
bucket.addMember(member);
}
}
return true;
}
return false;
}
@Override
public boolean removeMember(Member targetMember) {
if (super.removeMember(targetMember)) {
MemberRollup memberRollup = (MemberRollup) targetMember;
for (Map.Entry<String, Bucket> entry : getColocatedBuckets().entrySet()) {
String region = entry.getKey();
Bucket bucket = entry.getValue();
Member member = memberRollup.getColocatedMembers().get(region);
if (member != null) {
bucket.removeMember(member);
}
}
return true;
}
return false;
}
@Override
public void setPrimary(Member targetMember, float primaryLoad) {
super.setPrimary(targetMember, primaryLoad);
if (targetMember != null) {
MemberRollup memberRollup = (MemberRollup) targetMember;
for (Map.Entry<String, Bucket> entry : getColocatedBuckets().entrySet()) {
String region = entry.getKey();
Bucket bucket = entry.getValue();
Member member = memberRollup.getColocatedMembers().get(region);
if (member != null) {
bucket.setPrimary(member, primaryLoad);
}
}
}
}
Map<String, Bucket> getColocatedBuckets() {
return this.colocatedBuckets;
}
}
/**
* Represents a single member of the distributed system.
*/
protected class Member implements Comparable<Member> {
private final InternalDistributedMember memberId;
protected float weight;
protected float totalLoad;
protected float totalPrimaryLoad;
protected long totalBytes;
protected long localMaxMemory;
private final Set<Bucket> buckets = new TreeSet<Bucket>();
private final Set<Bucket> primaryBuckets = new TreeSet<Bucket>();
private final boolean isCritical;
private final boolean enforceLocalMaxMemory;
public Member(InternalDistributedMember memberId, boolean isCritical,
boolean enforceLocalMaxMemory) {
this.memberId = memberId;
this.isCritical = isCritical;
this.enforceLocalMaxMemory = enforceLocalMaxMemory;
}
public Member(InternalDistributedMember memberId, float weight, long localMaxMemory,
boolean isCritical, boolean enforceLocalMaxMemory) {
this(memberId, isCritical, enforceLocalMaxMemory);
this.weight = weight;
this.localMaxMemory = localMaxMemory;
}
/**
* @param bucket
* @param sourceMember the member we will be moving this bucket off of
* @param checkZone true if we should not put two copies of a bucket on two nodes with the same
* IP address.
*/
public RefusalReason willAcceptBucket(Bucket bucket, Member sourceMember, boolean checkZone) {
// make sure this member is not already hosting this bucket
if (getBuckets().contains(bucket)) {
return RefusalReason.ALREADY_HOSTING;
}
// Check the ip address
if (checkZone) {
// If the source member is equivalent to the target member, go
// ahead and allow the bucket move (it's not making our redundancy worse).
// TODO we could have some logic to prefer moving to different ip addresses
// Probably that logic should be another stage after redundancy recovery, like
// improveRedundancy.
boolean sourceIsEquivalent = sourceMember != null
&& addressComparor.areSameZone(getMemberId(), sourceMember.getDistributedMember());
if (sourceMember == null || !sourceIsEquivalent) {
for (Member hostingMember : bucket.getMembersHosting()) {
if ((!hostingMember.equals(sourceMember) || addressComparor.enforceUniqueZones())
&& addressComparor.areSameZone(getMemberId(),
hostingMember.getDistributedMember())) {
if (logger.isDebugEnabled()) {
logger.debug(
"Member {} would prefer not to host {} because it is already on another member with the same redundancy zone",
this, bucket);
}
return RefusalReason.SAME_ZONE;
}
}
}
}
// check the localMaxMemory
if (this.enforceLocalMaxMemory && this.totalBytes + bucket.getBytes() > this.localMaxMemory) {
if (logger.isDebugEnabled()) {
logger.debug("Member {} won't host bucket {} because it doesn't have enough space", this,
bucket);
}
return RefusalReason.LOCAL_MAX_MEMORY_FULL;
}
// check to see if the heap is critical
if (isCritical) {
if (logger.isDebugEnabled()) {
logger.debug("Member {} won't host bucket {} because it's heap is critical", this,
bucket);
}
return RefusalReason.CRITICAL_HEAP;
}
return RefusalReason.NONE;
}
public boolean addBucket(Bucket bucket) {
if (getBuckets().add(bucket)) {
bucket.addMember(this);
this.totalBytes += bucket.getBytes();
this.totalLoad += bucket.getLoad();
return true;
}
return false;
}
public boolean removeBucket(Bucket bucket) {
if (getBuckets().remove(bucket)) {
bucket.removeMember(this);
this.totalBytes -= bucket.getBytes();
this.totalLoad -= bucket.getLoad();
return true;
}
return false;
}
public boolean removePrimary(Bucket bucket) {
if (getPrimaryBuckets().remove(bucket)) {
this.totalPrimaryLoad -= bucket.getPrimaryLoad();
return true;
}
return false;
}
public boolean addPrimary(Bucket bucket) {
if (getPrimaryBuckets().add(bucket)) {
this.totalPrimaryLoad += bucket.getPrimaryLoad();
return true;
}
return false;
}
public int getBucketCount() {
return getBuckets().size();
}
public long getConfiguredMaxMemory() {
return this.localMaxMemory;
}
public InternalDistributedMember getDistributedMember() {
return getMemberId();
}
public int getPrimaryCount() {
int primaryCount = 0;
for (Bucket bucket : getBuckets()) {
if (this.equals(bucket.primary)) {
primaryCount++;
}
}
return primaryCount;
}
public long getSize() {
return this.totalBytes;
}
public float getTotalLoad() {
return this.totalLoad;
}
public float getWeight() {
return this.weight;
}
@Override
public String toString() {
return "Member(id=" + getMemberId() + ")";
}
public float getPrimaryLoad() {
return this.totalPrimaryLoad;
}
protected Set<Bucket> getBuckets() {
return this.buckets;
}
private InternalDistributedMember getMemberId() {
return this.memberId;
}
private Set<Bucket> getPrimaryBuckets() {
return this.primaryBuckets;
}
@Override
public int hashCode() {
return memberId.hashCode();
}
@Override
public boolean equals(Object other) {
if (!(other instanceof Member)) {
return false;
}
Member o = (Member) other;
return this.memberId.equals(o.memberId);
}
public int compareTo(Member other) {
// memberId is InternalDistributedMember which implements Comparable
return this.memberId.compareTo(other.memberId);
}
}
/**
* Represents a single bucket.
*/
protected class Bucket implements Comparable<Bucket> {
protected long bytes;
private final int id;
protected float load;
protected float primaryLoad;
private int redundancy = -1;
private final Set<Member> membersHosting = new TreeSet<Member>();
private Member primary;
protected Set<PersistentMemberID> offlineMembers = new HashSet<PersistentMemberID>();
public Bucket(int id) {
this.id = id;
}
public Bucket(int id, float load, long bytes, Set<PersistentMemberID> offlineMembers) {
this(id);
this.load = load;
this.bytes = bytes;
this.offlineMembers = offlineMembers;
}
public void setPrimary(Member member, float primaryLoad) {
if (this.primary == INVALID_MEMBER) {
return;
}
if (this.primary != null) {
this.primary.removePrimary(this);
}
this.primary = member;
this.primaryLoad = primaryLoad;
if (primary != INVALID_MEMBER && primary != null) {
addMember(primary);
member.addPrimary(this);
}
}
/**
* @param targetMember
*/
public boolean addMember(Member targetMember) {
if (this.getMembersHosting().add(targetMember)) {
this.redundancy++;
targetMember.addBucket(this);
return true;
}
return false;
}
public boolean removeMember(Member targetMember) {
if (this.getMembersHosting().remove(targetMember)) {
if (targetMember == this.primary) {
setPrimary(null, 0);
}
this.redundancy--;
targetMember.removeBucket(this);
return true;
}
return false;
}
public int getRedundancy() {
return this.redundancy + offlineMembers.size();
}
public int getOnlineRedundancy() {
return this.redundancy;
}
public float getLoad() {
return this.load;
}
public int getId() {
return this.id;
}
public long getBytes() {
return this.bytes;
}
@Override
public String toString() {
return "Bucket(id=" + getId() + ",load=" + load + ")";
}
public float getPrimaryLoad() {
return this.primaryLoad;
}
public Set<Member> getMembersHosting() {
return this.membersHosting;
}
public Member getPrimary() {
return this.primary;
}
public Collection<? extends PersistentMemberID> getOfflineMembers() {
return offlineMembers;
}
@Override
public int hashCode() {
return this.id;
}
@Override
public boolean equals(Object other) {
if (!(other instanceof Bucket)) {
return false;
}
Bucket o = (Bucket) other;
return this.id == o.id;
}
public int compareTo(Bucket other) {
if (this.id < other.id) {
return -1;
} else if (this.id > other.id) {
return 1;
} else {
return 0;
}
}
}
/**
* Represents a move from one node to another. Used to keep track of moves that we have already
* attempted that have failed.
*
*
*/
protected static class Move {
private final Member source;
private final Member target;
private final Bucket bucket;
public Move(Member source, Member target, Bucket bucket) {
super();
this.source = source;
this.target = target;
this.bucket = bucket;
}
/**
* @return the source
*/
public Member getSource() {
return this.source;
}
/**
* @return the target
*/
public Member getTarget() {
return this.target;
}
/**
* @return the bucket
*/
public Bucket getBucket() {
return this.bucket;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((this.bucket == null) ? 0 : this.bucket.hashCode());
result = prime * result + ((this.source == null) ? 0 : this.source.hashCode());
result = prime * result + ((this.target == null) ? 0 : this.target.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
Move other = (Move) obj;
if (this.bucket == null) {
if (other.bucket != null)
return false;
} else if (!this.bucket.equals(other.bucket))
return false;
if (this.source == null) {
if (other.source != null)
return false;
} else if (!this.source.equals(other.source))
return false;
if (this.target == null) {
if (other.target != null)
return false;
} else if (!this.target.equals(other.target))
return false;
return true;
}
}
public static interface AddressComparor {
public boolean enforceUniqueZones();
/**
* Return true if the two addresses are equivalent
*/
public boolean areSameZone(InternalDistributedMember member1,
InternalDistributedMember member2);
}
public static enum RefusalReason {
NONE, ALREADY_HOSTING, UNITIALIZED_MEMBER, SAME_ZONE, LOCAL_MAX_MEMORY_FULL, CRITICAL_HEAP;
public boolean willAccept() {
return this == NONE;
}
public String formatMessage(Member source, Member target, Bucket bucket) {
switch (this) {
case NONE:
return "No reason, the move should be allowed.";
case ALREADY_HOSTING:
return "Target member " + target.getMemberId() + " is already hosting bucket "
+ bucket.getId();
case UNITIALIZED_MEMBER:
return "Target member " + target.getMemberId() + " is not fully initialized";
case SAME_ZONE:
return "Target member " + target.getMemberId()
+ " is in the same redundancy zone as other members hosting bucket " + bucket.getId()
+ ": " + bucket.getMembersHosting();
case LOCAL_MAX_MEMORY_FULL:
return "Target member " + target.getMemberId()
+ " does not have space within it's local max memory for bucket " + bucket.getId()
+ ". Bucket Size " + bucket.getBytes() + " local max memory: " + target.localMaxMemory
+ " remaining: " + target.totalBytes;
case CRITICAL_HEAP:
return "Target member " + target.getMemberId()
+ " has reached its critical heap percentage, and cannot accept more data";
default:
return this.toString();
}
}
}
}