/*
* Copyright (c) 2008-2017, Hazelcast, Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.internal.partition.impl;
import com.hazelcast.cluster.ClusterState;
import com.hazelcast.core.HazelcastInstanceNotActiveException;
import com.hazelcast.core.MemberLeftException;
import com.hazelcast.core.MigrationEvent;
import com.hazelcast.instance.MemberImpl;
import com.hazelcast.instance.Node;
import com.hazelcast.internal.cluster.Versions;
import com.hazelcast.internal.metrics.Probe;
import com.hazelcast.internal.partition.InternalPartition;
import com.hazelcast.internal.partition.InternalPartitionService;
import com.hazelcast.internal.partition.MigrationInfo;
import com.hazelcast.internal.partition.MigrationInfo.MigrationStatus;
import com.hazelcast.internal.partition.PartitionRuntimeState;
import com.hazelcast.internal.partition.PartitionStateVersionMismatchException;
import com.hazelcast.internal.partition.impl.InternalMigrationListener.MigrationParticipant;
import com.hazelcast.internal.partition.impl.MigrationPlanner.MigrationDecisionCallback;
import com.hazelcast.internal.partition.operation.FinalizeMigrationOperation;
import com.hazelcast.internal.partition.operation.LegacyMigrationRequestOperation;
import com.hazelcast.internal.partition.operation.MigrationCommitOperation;
import com.hazelcast.internal.partition.operation.MigrationRequestOperation;
import com.hazelcast.internal.partition.operation.PartitionStateOperation;
import com.hazelcast.internal.partition.operation.PromotionCommitOperation;
import com.hazelcast.internal.partition.operation.ReplicaSyncRequest;
import com.hazelcast.internal.partition.operation.ShutdownResponseOperation;
import com.hazelcast.logging.ILogger;
import com.hazelcast.nio.Address;
import com.hazelcast.spi.ExecutionService;
import com.hazelcast.spi.Operation;
import com.hazelcast.spi.exception.TargetNotMemberException;
import com.hazelcast.spi.impl.NodeEngineImpl;
import com.hazelcast.spi.partition.IPartitionLostEvent;
import com.hazelcast.spi.partition.MigrationEndpoint;
import com.hazelcast.spi.properties.GroupProperty;
import com.hazelcast.spi.properties.HazelcastProperties;
import com.hazelcast.util.Clock;
import com.hazelcast.util.MutableInteger;
import com.hazelcast.util.Preconditions;
import com.hazelcast.util.scheduler.CoalescingDelayedTrigger;
import com.hazelcast.version.Version;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Lock;
import java.util.logging.Level;
import static com.hazelcast.cluster.memberselector.MemberSelectors.DATA_MEMBER_SELECTOR;
import static com.hazelcast.spi.partition.IPartitionService.SERVICE_NAME;
/**
* Maintains migration system state and manages migration operations performed within the cluster.
*/
@SuppressWarnings({"checkstyle:classdataabstractioncoupling", "checkstyle:methodcount"})
public class MigrationManager {
private static final boolean ASSERTION_ENABLED = MigrationManager.class.desiredAssertionStatus();
private static final int PARTITION_STATE_VERSION_INCREMENT_DELTA_ON_MIGRATION_FAILURE = 2;
private static final int MIGRATION_PAUSE_DURATION_SECONDS_ON_MIGRATION_FAILURE = 3;
private static final String INVALID_UUID = "<invalid-uuid>";
final long partitionMigrationInterval;
private final Node node;
private final NodeEngineImpl nodeEngine;
private final InternalPartitionServiceImpl partitionService;
private final ILogger logger;
private final PartitionStateManager partitionStateManager;
private final MigrationQueue migrationQueue = new MigrationQueue();
private final MigrationThread migrationThread;
private final AtomicBoolean migrationAllowed = new AtomicBoolean(true);
@Probe(name = "lastRepartitionTime")
private final AtomicLong lastRepartitionTime = new AtomicLong();
private final long partitionMigrationTimeout;
private final CoalescingDelayedTrigger delayedResumeMigrationTrigger;
private final Set<Address> shutdownRequestedAddresses = new HashSet<Address>();
// updates will be done under lock, but reads will be multithreaded.
private volatile MigrationInfo activeMigrationInfo;
// both reads and updates will be done under lock!
private final LinkedHashSet<MigrationInfo> completedMigrations = new LinkedHashSet<MigrationInfo>();
@Probe
private final AtomicLong completedMigrationCounter = new AtomicLong();
private volatile InternalMigrationListener internalMigrationListener
= new InternalMigrationListener.NopInternalMigrationListener();
private final Lock partitionServiceLock;
private final MigrationPlanner migrationPlanner;
private final boolean fragmentedMigrationEnabled;
MigrationManager(Node node, InternalPartitionServiceImpl service, Lock partitionServiceLock) {
this.node = node;
this.nodeEngine = node.nodeEngine;
this.partitionService = service;
this.logger = node.getLogger(getClass());
this.partitionServiceLock = partitionServiceLock;
migrationPlanner = new MigrationPlanner(node.getLogger(MigrationPlanner.class));
HazelcastProperties properties = node.getProperties();
long intervalMillis = properties.getMillis(GroupProperty.PARTITION_MIGRATION_INTERVAL);
partitionMigrationInterval = (intervalMillis > 0 ? intervalMillis : 0);
partitionMigrationTimeout = properties.getMillis(GroupProperty.PARTITION_MIGRATION_TIMEOUT);
fragmentedMigrationEnabled = properties.getBoolean(GroupProperty.PARTITION_FRAGMENTED_MIGRATION_ENABLED);
partitionStateManager = partitionService.getPartitionStateManager();
ILogger migrationThreadLogger = node.getLogger(MigrationThread.class);
String hzName = nodeEngine.getHazelcastInstance().getName();
migrationThread = new MigrationThread(this, hzName, migrationThreadLogger, migrationQueue);
long migrationPauseDelayMs = TimeUnit.SECONDS.toMillis(MIGRATION_PAUSE_DURATION_SECONDS_ON_MIGRATION_FAILURE);
ExecutionService executionService = nodeEngine.getExecutionService();
delayedResumeMigrationTrigger = new CoalescingDelayedTrigger(
executionService, migrationPauseDelayMs, 2 * migrationPauseDelayMs, new Runnable() {
@Override
public void run() {
resumeMigration();
}
});
}
@Probe(name = "migrationActive")
private int migrationActiveProbe() {
return migrationAllowed.get() ? 1 : 0;
}
void pauseMigration() {
migrationAllowed.set(false);
}
void resumeMigration() {
migrationAllowed.set(true);
}
private void resumeMigrationEventually() {
delayedResumeMigrationTrigger.executeWithDelay();
}
/**
* Checks if migration tasks are allowed. This can include partition state and partition data sync tasks.
* The migration is not allowed during membership changes (member removed or joining) or for a shorter period when
* a migration fails before restarting the migration process.
*
* @see MigrationRunnable
* @see PublishPartitionRuntimeStateTask
* @see PartitionStateOperation
* @see ReplicaSyncRequest
*/
boolean isMigrationAllowed() {
return migrationAllowed.get();
}
/**
* Finalizes a migration that has finished with {@link MigrationStatus#SUCCESS} or {@link MigrationStatus#FAILED}
* by invoking {@link FinalizeMigrationOperation} locally if this is the source or destination and removes the active
* migration. Clears the migration flag if this node is the partition owner of a backup migration.
* Otherwise, the migration flag is cleared asynchronously within {@link FinalizeMigrationOperation}
* <p>
* This method should not be called on a node which is not the source, destination or partition owner for this migration.
*
* @param migrationInfo the migration to be finalized
*/
private void finalizeMigration(MigrationInfo migrationInfo) {
try {
Address thisAddress = node.getThisAddress();
int partitionId = migrationInfo.getPartitionId();
boolean source = thisAddress.equals(migrationInfo.getSource());
boolean destination = thisAddress.equals(migrationInfo.getDestination());
assert migrationInfo.getStatus() == MigrationStatus.SUCCESS
|| migrationInfo.getStatus() == MigrationStatus.FAILED : "Invalid migration: " + migrationInfo;
if (source || destination) {
boolean success = migrationInfo.getStatus() == MigrationStatus.SUCCESS;
MigrationParticipant participant = source ? MigrationParticipant.SOURCE : MigrationParticipant.DESTINATION;
if (success) {
internalMigrationListener.onMigrationCommit(participant, migrationInfo);
} else {
internalMigrationListener.onMigrationRollback(participant, migrationInfo);
}
MigrationEndpoint endpoint = source ? MigrationEndpoint.SOURCE : MigrationEndpoint.DESTINATION;
FinalizeMigrationOperation op = new FinalizeMigrationOperation(migrationInfo, endpoint, success);
op.setPartitionId(partitionId).setNodeEngine(nodeEngine).setValidateTarget(false)
.setService(partitionService);
nodeEngine.getOperationService().execute(op);
removeActiveMigration(partitionId);
} else {
final Address partitionOwner = partitionStateManager.getPartitionImpl(partitionId).getOwnerOrNull();
if (node.getThisAddress().equals(partitionOwner)) {
removeActiveMigration(partitionId);
partitionStateManager.clearMigratingFlag(partitionId);
} else {
logger.severe("Failed to finalize migration because this member " + thisAddress
+ " is not a participant of the migration: " + migrationInfo);
}
}
} catch (Exception e) {
logger.warning(e);
} finally {
migrationInfo.doneProcessing();
}
}
/**
* Sets the active migration if none is set and returns {@code null}, otherwise returns the currently set active migration.
* Acquires the partition service lock.
*/
public MigrationInfo setActiveMigration(MigrationInfo migrationInfo) {
partitionServiceLock.lock();
try {
if (activeMigrationInfo == null) {
activeMigrationInfo = migrationInfo;
return null;
}
if (!activeMigrationInfo.equals(migrationInfo)) {
if (logger.isFineEnabled()) {
logger.fine("Active migration is not set: " + migrationInfo
+ ". Existing active migration: " + activeMigrationInfo);
}
}
return activeMigrationInfo;
} finally {
partitionServiceLock.unlock();
}
}
MigrationInfo getActiveMigration() {
return activeMigrationInfo;
}
/**
* Removes the current {@link #activeMigrationInfo} if the {@code partitionId} is the same and returns {@code true} if
* removed.
* Acquires the partition service lock.
*/
private boolean removeActiveMigration(int partitionId) {
partitionServiceLock.lock();
try {
if (activeMigrationInfo != null) {
if (activeMigrationInfo.getPartitionId() == partitionId) {
activeMigrationInfo = null;
return true;
}
if (logger.isFineEnabled()) {
logger.fine("Active migration is not removed, because it has different partitionId! "
+ "partitionId=" + partitionId + ", active migration=" + activeMigrationInfo);
}
}
} finally {
partitionServiceLock.unlock();
}
return false;
}
/**
* Finalizes the active migration if it is equal to the {@code migrationInfo} or if this node was a backup replica before
* the migration (see {@link FinalizeMigrationOperation}).
* Acquires the partition service lock.
*/
void scheduleActiveMigrationFinalization(final MigrationInfo migrationInfo) {
partitionServiceLock.lock();
try {
// we use activeMigrationInfo because it contains migrated replica fragment namespaces
final MigrationInfo activeMigrationInfo = this.activeMigrationInfo;
if (activeMigrationInfo != null && migrationInfo.equals(activeMigrationInfo)) {
if (activeMigrationInfo.startProcessing()) {
activeMigrationInfo.setStatus(migrationInfo.getStatus());
finalizeMigration(activeMigrationInfo);
} else {
logger.info("Scheduling finalization of " + migrationInfo
+ ", because migration process is currently running.");
nodeEngine.getExecutionService().schedule(new Runnable() {
@Override
public void run() {
scheduleActiveMigrationFinalization(activeMigrationInfo);
}
}, 3, TimeUnit.SECONDS);
}
return;
}
if (migrationInfo.getSourceCurrentReplicaIndex() > 0
&& node.getThisAddress().equals(migrationInfo.getSource())) {
// OLD BACKUP
finalizeMigration(migrationInfo);
}
} finally {
partitionServiceLock.unlock();
}
}
/**
* Sends a {@link MigrationCommitOperation} to the destination and returns {@code true} if the new partition state
* was applied on the destination.
*/
private boolean commitMigrationToDestination(Address destination, MigrationInfo migration) {
assert migration != null : "No migrations to commit! destination=" + destination;
if (node.getThisAddress().equals(destination)) {
if (logger.isFinestEnabled()) {
logger.finest("Shortcutting migration commit, since destination is master. -> " + migration);
}
return true;
}
MemberImpl member = node.getClusterService().getMember(destination);
if (member == null) {
logger.warning("Destination " + destination + " is not member anymore");
return false;
}
try {
if (logger.isFinestEnabled()) {
logger.finest("Sending commit operation to " + destination + " for " + migration);
}
PartitionRuntimeState partitionState = partitionService.createMigrationCommitPartitionState(migration);
String destinationUuid = member.getUuid();
MigrationCommitOperation operation = new MigrationCommitOperation(partitionState, destinationUuid);
Future<Boolean> future = nodeEngine.getOperationService()
.createInvocationBuilder(SERVICE_NAME, operation, destination)
.setTryCount(Integer.MAX_VALUE)
.setCallTimeout(Long.MAX_VALUE).invoke();
boolean result = future.get();
if (logger.isFinestEnabled()) {
logger.finest("Migration commit result " + result + " from " + destination + " for " + migration);
}
return result;
} catch (Throwable t) {
logMigrationCommitFailure(destination, migration, t);
}
return false;
}
private void logMigrationCommitFailure(Address destination, MigrationInfo migration, Throwable t) {
boolean memberLeft = t instanceof MemberLeftException
|| t.getCause() instanceof TargetNotMemberException
|| t.getCause() instanceof HazelcastInstanceNotActiveException;
if (memberLeft) {
if (node.getThisAddress().equals(destination)) {
logger.fine("Migration commit failed for " + migration
+ " since this node is shutting down.");
return;
}
logger.warning("Migration commit failed for " + migration
+ " since destination " + destination + " left the cluster");
} else {
logger.severe("Migration commit to " + destination + " failed for " + migration, t);
}
}
/**
* Adds the migration to the set of completed migrations and increases the completed migration counter.
* Acquires the partition service lock to update the migrations.
*
* @param migrationInfo the completed migration
* @return {@code true} if the migration has been added or {@code false} if this migration is already in the completed set
* @throws IllegalArgumentException if the migration is not completed
*/
boolean addCompletedMigration(MigrationInfo migrationInfo) {
if (migrationInfo.getStatus() != MigrationStatus.SUCCESS
&& migrationInfo.getStatus() != MigrationStatus.FAILED) {
throw new IllegalArgumentException("Migration doesn't seem completed: " + migrationInfo);
}
partitionServiceLock.lock();
try {
boolean added = completedMigrations.add(migrationInfo);
if (added) {
completedMigrationCounter.incrementAndGet();
}
return added;
} finally {
partitionServiceLock.unlock();
}
}
/** Retains only the {@code migrations} in the completed migration list. Acquires the partition service lock. */
void retainCompletedMigrations(Collection<MigrationInfo> migrations) {
partitionServiceLock.lock();
try {
completedMigrations.retainAll(migrations);
} finally {
partitionServiceLock.unlock();
}
}
/**
* Removes all completed migrations up to the given {@code currentMigration}.
*
* @param currentMigration the migration up to which migrations will be evicted
*/
private void evictCompletedMigrations(MigrationInfo currentMigration) {
partitionServiceLock.lock();
try {
assert completedMigrations.contains(currentMigration) : currentMigration + " to evict is not in completed migrations";
Iterator<MigrationInfo> iter = completedMigrations.iterator();
while (iter.hasNext()) {
MigrationInfo migration = iter.next();
iter.remove();
// evict completed migrations including current migration
if (migration.equals(currentMigration)) {
return;
}
}
} finally {
partitionServiceLock.unlock();
}
}
/** Clears the migration queue and triggers the control task. Called on the master node. */
void triggerControlTask() {
migrationQueue.clear();
if (!node.getClusterService().isJoined()) {
logger.fine("Node is not joined, will not trigger ControlTask");
return;
}
if (!node.isMaster()) {
logger.fine("Node is not master, will not trigger ControlTask");
return;
}
migrationQueue.add(new ControlTask());
if (logger.isFinestEnabled()) {
logger.finest("Migration queue is cleared and control task is scheduled");
}
}
InternalMigrationListener getInternalMigrationListener() {
return internalMigrationListener;
}
void setInternalMigrationListener(InternalMigrationListener listener) {
Preconditions.checkNotNull(listener);
internalMigrationListener = listener;
}
void resetInternalMigrationListener() {
internalMigrationListener = new InternalMigrationListener.NopInternalMigrationListener();
}
void onShutdownRequest(Address address) {
if (!partitionStateManager.isInitialized()) {
sendShutdownOperation(address);
return;
}
ClusterState clusterState = node.getClusterService().getClusterState();
if (!clusterState.isMigrationAllowed() && clusterState != ClusterState.IN_TRANSITION) {
sendShutdownOperation(address);
return;
}
if (shutdownRequestedAddresses.add(address)) {
logger.info("Shutdown request of " + address + " is handled");
triggerControlTask();
}
}
void onMemberRemove(MemberImpl member) {
Address deadAddress = member.getAddress();
shutdownRequestedAddresses.remove(deadAddress);
final MigrationInfo activeMigration = activeMigrationInfo;
if (activeMigration != null) {
if (deadAddress.equals(activeMigration.getSource())
|| deadAddress.equals(activeMigration.getDestination())) {
activeMigration.setStatus(MigrationStatus.INVALID);
}
}
}
void schedule(MigrationRunnable runnable) {
migrationQueue.add(runnable);
}
/** Returns a copy of the list of completed migrations. Runs under the partition service lock. */
List<MigrationInfo> getCompletedMigrationsCopy() {
partitionServiceLock.lock();
try {
return new ArrayList<MigrationInfo>(completedMigrations);
} finally {
partitionServiceLock.unlock();
}
}
boolean hasOnGoingMigration() {
return activeMigrationInfo != null || migrationQueue.hasMigrationTasks();
}
int getMigrationQueueSize() {
return migrationQueue.migrationTaskCount();
}
void reset() {
migrationQueue.clear();
activeMigrationInfo = null;
completedMigrations.clear();
}
void start() {
migrationThread.start();
}
void stop() {
migrationThread.stopNow();
}
/** Schedules a migration by adding it to the migration queue. */
void scheduleMigration(MigrationInfo migrationInfo) {
migrationQueue.add(new MigrateTask(migrationInfo));
}
/** Mutates the partition state and applies the migration. */
void applyMigration(InternalPartitionImpl partition, MigrationInfo migrationInfo) {
final Address[] addresses = Arrays.copyOf(partition.getReplicaAddresses(), InternalPartition.MAX_REPLICA_COUNT);
if (migrationInfo.getSourceCurrentReplicaIndex() > -1) {
addresses[migrationInfo.getSourceCurrentReplicaIndex()] = null;
}
if (migrationInfo.getDestinationCurrentReplicaIndex() > -1) {
addresses[migrationInfo.getDestinationCurrentReplicaIndex()] = null;
}
addresses[migrationInfo.getDestinationNewReplicaIndex()] = migrationInfo.getDestination();
if (migrationInfo.getSourceNewReplicaIndex() > -1) {
addresses[migrationInfo.getSourceNewReplicaIndex()] = migrationInfo.getSource();
}
partition.setReplicaAddresses(addresses);
}
Set<Address> getShutdownRequestedAddresses() {
return shutdownRequestedAddresses;
}
/** Sends a {@link ShutdownResponseOperation} to the {@code address} or takes a shortcut if shutdown is local. */
private void sendShutdownOperation(Address address) {
if (node.getThisAddress().equals(address)) {
assert !node.isRunning() : "Node state: " + node.getState();
partitionService.onShutdownResponse();
} else {
nodeEngine.getOperationService().send(new ShutdownResponseOperation(), address);
}
}
MigrationRunnable getActiveTask() {
return migrationThread.getActiveTask();
}
private String getMemberUuid(Address address) {
MemberImpl member = node.getClusterService().getMember(address);
return member != null ? member.getUuid() : INVALID_UUID;
}
/**
* Invoked on the master node. Rearranges the partition table if there is no recent activity in the cluster after
* this task has been scheduled, schedules migrations and syncs the partition state.
* Also schedules a {@link ProcessShutdownRequestsTask}. Acquires partition service lock.
*/
private class RepartitioningTask implements MigrationRunnable {
@Override
public void run() {
if (!node.isMaster()) {
return;
}
partitionServiceLock.lock();
try {
Address[][] newState = repartition();
if (newState == null) {
return;
}
lastRepartitionTime.set(Clock.currentTimeMillis());
processNewPartitionState(newState);
if (ASSERTION_ENABLED) {
migrationQueue.add(new AssertPartitionTableTask(partitionService.getMaxAllowedBackupCount()));
}
migrationQueue.add(new ProcessShutdownRequestsTask());
partitionService.syncPartitionRuntimeState();
} finally {
partitionServiceLock.unlock();
}
}
/**
* Rearranges the partition table if the cluster is stable, returns the new partition table and schedules a
* {@link ProcessShutdownRequestsTask} if the repartitioning failed.
*
* @return the new partition table or {@code null} if the cluster is not stable or the repartitioning failed
*/
private Address[][] repartition() {
if (!isRepartitioningAllowed()) {
return null;
}
Address[][] newState = partitionStateManager.repartition(shutdownRequestedAddresses);
if (newState == null) {
migrationQueue.add(new ProcessShutdownRequestsTask());
return null;
}
if (!isRepartitioningAllowed()) {
return null;
}
return newState;
}
/** Processes the new partition state by planning and scheduling migrations. */
private void processNewPartitionState(Address[][] newState) {
final MutableInteger lostCount = new MutableInteger();
final MutableInteger migrationCount = new MutableInteger();
final List<Queue<MigrationInfo>> migrations = new ArrayList<Queue<MigrationInfo>>(newState.length);
for (int partitionId = 0; partitionId < newState.length; partitionId++) {
InternalPartitionImpl currentPartition = partitionStateManager.getPartitionImpl(partitionId);
Address[] currentReplicas = currentPartition.getReplicaAddresses();
Address[] newReplicas = newState[partitionId];
MigrationCollector migrationCollector = new MigrationCollector(currentPartition, migrationCount, lostCount);
if (logger.isFinestEnabled()) {
logger.finest("Planning migrations for partitionId=" + partitionId
+ ". Current replicas: " + Arrays.toString(currentReplicas)
+ ", New replicas: " + Arrays.toString(newReplicas));
}
migrationPlanner.planMigrations(currentReplicas, newReplicas, migrationCollector);
migrationPlanner.prioritizeCopiesAndShiftUps(migrationCollector.migrations);
migrations.add(migrationCollector.migrations);
}
scheduleMigrations(migrations);
logMigrationStatistics(migrationCount.value, lostCount.value);
}
/** Schedules all migrations. */
private void scheduleMigrations(List<Queue<MigrationInfo>> migrations) {
boolean migrationScheduled;
do {
migrationScheduled = false;
for (Queue<MigrationInfo> queue : migrations) {
MigrationInfo migration = queue.poll();
if (migration != null) {
migrationScheduled = true;
scheduleMigration(migration);
}
}
} while (migrationScheduled);
}
private void logMigrationStatistics(int migrationCount, int lostCount) {
if (lostCount > 0) {
logger.warning("Assigning new owners for " + lostCount + " LOST partitions!");
}
if (migrationCount > 0) {
logger.info("Re-partitioning cluster data... Migration queue size: " + migrationCount);
} else {
logger.info("Partition balance is ok, no need to re-partition cluster data... ");
}
}
private void assignNewPartitionOwner(int partitionId, InternalPartitionImpl currentPartition, Address newOwner) {
String destinationUuid = getMemberUuid(newOwner);
MigrationInfo migrationInfo = new MigrationInfo(partitionId, null, null, newOwner, destinationUuid, -1, -1, -1, 0);
PartitionEventManager partitionEventManager = partitionService.getPartitionEventManager();
partitionEventManager.sendMigrationEvent(migrationInfo, MigrationEvent.MigrationStatus.STARTED);
currentPartition.setReplicaAddress(0, newOwner);
partitionEventManager.sendMigrationEvent(migrationInfo, MigrationEvent.MigrationStatus.COMPLETED);
}
/**
* Returns {@code true} if there are no migrations in the migration queue, no new node is joining, there is no
* ongoing repartitioning and the cluster state allows migrations, {@link ClusterState#isMigrationAllowed()},
* otherwise triggers the control task.
*/
private boolean isRepartitioningAllowed() {
if (!doesClusterStateAllowsMigration()) {
logger.finest("Cluster state doesn't allow repartitioning. RepartitioningTask will stop.");
return false;
}
boolean migrationAllowed = isMigrationAllowed();
boolean hasMigrationTasks = migrationQueue.migrationTaskCount() > 1;
if (migrationAllowed && !hasMigrationTasks) {
return true;
}
triggerControlTask();
return false;
}
private boolean doesClusterStateAllowsMigration() {
ClusterState clusterState = node.getClusterService().getClusterState();
return clusterState.isMigrationAllowed();
}
private class MigrationCollector implements MigrationDecisionCallback {
private final int partitionId;
private final InternalPartitionImpl partition;
private final MutableInteger migrationCount;
private final MutableInteger lostCount;
private final LinkedList<MigrationInfo> migrations = new LinkedList<MigrationInfo>();
MigrationCollector(InternalPartitionImpl partition, MutableInteger migrationCount, MutableInteger lostCount) {
partitionId = partition.getPartitionId();
this.partition = partition;
this.migrationCount = migrationCount;
this.lostCount = lostCount;
}
@Override
public void migrate(Address source, int sourceCurrentReplicaIndex, int sourceNewReplicaIndex,
Address destination, int destinationCurrentReplicaIndex, int destinationNewReplicaIndex) {
if (logger.isFineEnabled()) {
logger.fine("Planned migration -> partitionId=" + partitionId
+ ", source=" + source + ", sourceCurrentReplicaIndex=" + sourceCurrentReplicaIndex
+ ", sourceNewReplicaIndex=" + sourceNewReplicaIndex + ", destination=" + destination
+ ", destinationCurrentReplicaIndex=" + destinationCurrentReplicaIndex
+ ", destinationNewReplicaIndex=" + destinationNewReplicaIndex);
}
if (source == null && destinationCurrentReplicaIndex == -1 && destinationNewReplicaIndex == 0) {
assert destination != null : "partitionId=" + partitionId + " destination is null";
assert sourceCurrentReplicaIndex == -1
: "partitionId=" + partitionId + " invalid index: " + sourceCurrentReplicaIndex;
assert sourceNewReplicaIndex == -1
: "partitionId=" + partitionId + " invalid index: " + sourceNewReplicaIndex;
lostCount.value++;
assignNewPartitionOwner(partitionId, partition, destination);
} else if (destination == null && sourceNewReplicaIndex == -1) {
assert source != null : "partitionId=" + partitionId + " source is null";
assert sourceCurrentReplicaIndex != -1
: "partitionId=" + partitionId + " invalid index: " + sourceCurrentReplicaIndex;
assert sourceCurrentReplicaIndex != 0
: "partitionId=" + partitionId + " invalid index: " + sourceCurrentReplicaIndex;
final Address currentSource = partition.getReplicaAddress(sourceCurrentReplicaIndex);
assert source.equals(currentSource)
: "partitionId=" + partitionId + " current source="
+ source + " is different than expected source=" + source;
partition.setReplicaAddress(sourceCurrentReplicaIndex, null);
} else {
String sourceUuid = getMemberUuid(source);
String destinationUuid = getMemberUuid(destination);
MigrationInfo migration = new MigrationInfo(partitionId, source, sourceUuid, destination, destinationUuid,
sourceCurrentReplicaIndex, sourceNewReplicaIndex,
destinationCurrentReplicaIndex, destinationNewReplicaIndex);
migrationCount.value++;
migrations.add(migration);
}
}
}
}
/**
* Assertion task for checking the consistency of the partition table. Invoked on the master node to assert if the
* partition table is :
* <ul>
* <li>missing some replicas (the address is {@code null} but there are no nodes currently shutting down)</li>
* <li>has more than the maximum configured replica count</li>
* <li>has duplicate addresses in the same partition</li>
* </ul>
* Acquires partition service lock.
*/
@SuppressWarnings({"checkstyle:npathcomplexity"})
private final class AssertPartitionTableTask implements MigrationRunnable {
final int maxBackupCount;
private AssertPartitionTableTask(int maxBackupCount) {
this.maxBackupCount = maxBackupCount;
}
@Override
public void run() {
if (!ASSERTION_ENABLED) {
return;
}
if (!node.isMaster()) {
return;
}
partitionServiceLock.lock();
try {
if (!partitionStateManager.isInitialized()) {
logger.info("Skipping partition table assertions since partition table state is reset");
return;
}
final InternalPartition[] partitions = partitionStateManager.getPartitions();
final Set<Address> replicas = new HashSet<Address>();
for (InternalPartition partition : partitions) {
replicas.clear();
for (int index = 0; index < InternalPartition.MAX_REPLICA_COUNT; index++) {
final Address address = partition.getReplicaAddress(index);
if (index <= maxBackupCount) {
if (shutdownRequestedAddresses.isEmpty()) {
assert address != null : "Repartitioning problem, missing replica! "
+ "Current replica: " + index + ", Max backups: " + maxBackupCount
+ " -> " + partition;
}
} else {
assert address == null : "Repartitioning problem, leaking replica! "
+ "Current replica: " + index + ", Max backups: " + maxBackupCount
+ " -> " + partition;
}
if (address != null) {
assert replicas.add(address) : "Duplicate address in " + partition;
}
}
}
} finally {
partitionServiceLock.unlock();
}
}
}
/**
* Invoked on the master node to migrate a partition (not including promotions). It will execute the
* {@link MigrationRequestOperation} on the partition owner.
*/
class MigrateTask implements MigrationRunnable {
final MigrationInfo migrationInfo;
MigrateTask(MigrationInfo migrationInfo) {
this.migrationInfo = migrationInfo;
migrationInfo.setMaster(node.getThisAddress());
}
@Override
public void run() {
if (!node.isMaster()) {
return;
}
if (migrationInfo.getSource() == null
&& migrationInfo.getDestinationCurrentReplicaIndex() > 0
&& migrationInfo.getDestinationNewReplicaIndex() == 0) {
throw new AssertionError("Promotion migrations should be handled by "
+ RepairPartitionTableTask.class.getSimpleName() + "! -> " + migrationInfo);
}
try {
MemberImpl partitionOwner = checkMigrationParticipantsAndGetPartitionOwner();
if (partitionOwner == null) {
return;
}
beforeMigration();
Boolean result = executeMigrateOperation(partitionOwner);
processMigrationResult(result);
} catch (Throwable t) {
final Level level = migrationInfo.isValid() ? Level.WARNING : Level.FINE;
logger.log(level, "Error [" + t.getClass() + ": " + t.getMessage() + "] during " + migrationInfo);
logger.finest(t);
migrationOperationFailed();
}
}
/** Sends a migration event to the event listeners. */
private void beforeMigration() {
internalMigrationListener.onMigrationStart(MigrationParticipant.MASTER, migrationInfo);
partitionService.getPartitionEventManager()
.sendMigrationEvent(migrationInfo, MigrationEvent.MigrationStatus.STARTED);
if (logger.isFineEnabled()) {
logger.fine("Starting Migration: " + migrationInfo);
}
}
/**
* Checks if the partition owner is not {@code null}, the source and destinations are still members and returns the owner.
* Returns {@code null} and reschedules the {@link ControlTask} if the checks failed.
*/
private MemberImpl checkMigrationParticipantsAndGetPartitionOwner() {
MemberImpl partitionOwner = getPartitionOwner();
if (partitionOwner == null) {
logger.fine("Partition owner is null. Ignoring " + migrationInfo);
triggerRepartitioningAfterMigrationFailure();
return null;
}
if (migrationInfo.getSource() != null) {
if (node.getClusterService().getMember(migrationInfo.getSource()) == null) {
logger.fine("Source is not member anymore. Ignoring " + migrationInfo);
triggerRepartitioningAfterMigrationFailure();
return null;
}
}
if (node.getClusterService().getMember(migrationInfo.getDestination()) == null) {
logger.fine("Destination is not member anymore. Ignoring " + migrationInfo);
triggerRepartitioningAfterMigrationFailure();
return null;
}
return partitionOwner;
}
/** Returns the partition owner or {@code null} if it is not set. */
private MemberImpl getPartitionOwner() {
InternalPartitionImpl partition = partitionStateManager.getPartitionImpl(migrationInfo.getPartitionId());
Address owner = partition.getOwnerOrNull();
if (owner == null) {
if (migrationInfo.isValid()) {
logger.severe("Skipping migration! Partition owner is not set! -> partitionId="
+ migrationInfo.getPartitionId()
+ ", " + partition + " -VS- " + migrationInfo);
}
return null;
}
return node.getClusterService().getMember(owner);
}
/** Completes the partition migration. The migration was successful if the {@code result} is {@link Boolean#TRUE}. */
private void processMigrationResult(Boolean result) {
if (Boolean.TRUE.equals(result)) {
if (logger.isFineEnabled()) {
logger.fine("Finished Migration: " + migrationInfo);
}
migrationOperationSucceeded();
} else {
Level level = nodeEngine.isRunning() && migrationInfo.isValid() ? Level.WARNING : Level.FINE;
if (logger.isLoggable(level)) {
logger.log(level, "Migration failed: " + migrationInfo);
}
migrationOperationFailed();
}
}
/**
* Sends a {@link MigrationRequestOperation} to the {@code fromMember} and returns the migration result if the
* migration was successful.
*/
private Boolean executeMigrateOperation(MemberImpl fromMember) {
int partitionStateVersion = partitionService.getPartitionStateVersion();
Version clusterVersion = node.getClusterService().getClusterVersion();
Operation migrationRequestOp = clusterVersion.isGreaterOrEqual(Versions.V3_9)
? new MigrationRequestOperation(migrationInfo, partitionStateVersion, fragmentedMigrationEnabled)
: new LegacyMigrationRequestOperation(migrationInfo, partitionStateVersion);
Future future = nodeEngine.getOperationService().createInvocationBuilder(SERVICE_NAME, migrationRequestOp,
fromMember.getAddress())
.setCallTimeout(partitionMigrationTimeout)
.setTryCount(InternalPartitionService.MIGRATION_RETRY_COUNT)
.setTryPauseMillis(InternalPartitionService.MIGRATION_RETRY_PAUSE).invoke();
try {
Object response = future.get();
return (Boolean) nodeEngine.toObject(response);
} catch (Throwable e) {
Level level = nodeEngine.isRunning() && migrationInfo.isValid() ? Level.WARNING : Level.FINE;
if (e instanceof ExecutionException && e.getCause() instanceof PartitionStateVersionMismatchException) {
level = Level.FINE;
}
if (logger.isLoggable(level)) {
logger.log(level, "Failed migration from " + fromMember + " for " + migrationInfo, e);
}
}
return Boolean.FALSE;
}
/**
* Called on the master node to complete the migration and notify the migration listeners that the migration completed.
* It will :
* <ul>
* <li>set the migration status</li>
* <li>update the completed migration list</li>
* <li>schedule the migration for finalization</li>
* <li>update the local partition state version</li>
* <li>sync the partition state with cluster members</li>
* <li>triggers the {@link ControlTask}</li>
* <li>publishes a {@link MigrationEvent}</li>
* </ul>
* <p>
* Acquires the partition state lock.
*/
private void migrationOperationFailed() {
migrationInfo.setStatus(MigrationStatus.FAILED);
internalMigrationListener.onMigrationComplete(MigrationParticipant.MASTER, migrationInfo, false);
partitionServiceLock.lock();
try {
addCompletedMigration(migrationInfo);
internalMigrationListener.onMigrationRollback(MigrationParticipant.MASTER, migrationInfo);
scheduleActiveMigrationFinalization(migrationInfo);
int delta = PARTITION_STATE_VERSION_INCREMENT_DELTA_ON_MIGRATION_FAILURE;
partitionService.getPartitionStateManager().incrementVersion(delta);
node.getNodeExtension().onPartitionStateChange();
if (partitionService.syncPartitionRuntimeState()) {
evictCompletedMigrations(migrationInfo);
}
triggerRepartitioningAfterMigrationFailure();
} finally {
partitionServiceLock.unlock();
}
partitionService.getPartitionEventManager().sendMigrationEvent(migrationInfo, MigrationEvent.MigrationStatus.FAILED);
}
/** Waits for some time and rerun the {@link ControlTask}. */
private void triggerRepartitioningAfterMigrationFailure() {
// Migration failed.
// Pause migration process for a small amount of time, if a migration attempt is failed.
// Otherwise, migration failures can do a busy spin until migration problem is resolved.
// Migration can fail either a node's just joined and not completed start yet or it's just left the cluster.
// Re-execute RepartitioningTask when all other migration tasks are done,
// an imbalance may occur because of this failure.
partitionServiceLock.lock();
try {
pauseMigration();
triggerControlTask();
resumeMigrationEventually();
} finally {
partitionServiceLock.unlock();
}
}
/**
* Called on the master node to complete the migration and notify the migration listeners that the migration completed.
* It will :
* <ul>
* <li>commit the migration on the destination</li>
* <li>set the migration status</li>
* <li>update the local partition state</li>
* <li>schedule the migration for finalization</li>
* <li>sync the partition state with cluster members</li>
* <li>update the completed migration list</li>
* <li>publishes a {@link MigrationEvent}</li>
* </ul>
* <p>
* Triggers the {@link ControlTask} if the migration failed. Acquires the partition state lock to process the result
* of the migration commit.
*/
private void migrationOperationSucceeded() {
internalMigrationListener.onMigrationComplete(MigrationParticipant.MASTER, migrationInfo, true);
boolean commitSuccessful = commitMigrationToDestination(migrationInfo.getDestination(), migrationInfo);
partitionServiceLock.lock();
try {
if (commitSuccessful) {
migrationInfo.setStatus(MigrationStatus.SUCCESS);
internalMigrationListener.onMigrationCommit(MigrationParticipant.MASTER, migrationInfo);
// updates partition table after successful commit
InternalPartitionImpl partition = partitionStateManager.getPartitionImpl(migrationInfo.getPartitionId());
applyMigration(partition, migrationInfo);
} else {
migrationInfo.setStatus(MigrationStatus.FAILED);
internalMigrationListener.onMigrationRollback(MigrationParticipant.MASTER, migrationInfo);
int delta = PARTITION_STATE_VERSION_INCREMENT_DELTA_ON_MIGRATION_FAILURE;
partitionService.getPartitionStateManager().incrementVersion(delta);
triggerRepartitioningAfterMigrationFailure();
}
addCompletedMigration(migrationInfo);
scheduleActiveMigrationFinalization(migrationInfo);
node.getNodeExtension().onPartitionStateChange();
if (partitionService.syncPartitionRuntimeState()) {
evictCompletedMigrations(migrationInfo);
}
} finally {
partitionServiceLock.unlock();
}
PartitionEventManager partitionEventManager = partitionService.getPartitionEventManager();
partitionEventManager.sendMigrationEvent(migrationInfo, MigrationEvent.MigrationStatus.COMPLETED);
}
@Override
public String toString() {
return getClass().getSimpleName() + "{" + "migrationInfo=" + migrationInfo + '}';
}
}
/**
* Checks if the partition table needs repairing once the partitions have been initialized (assigned).
* This means that it will:
* <li>Remove unknown addresses from the partition table</li>
* <li>Promote the partition replicas if necessary (the partition owner is missing)</li>
* </ul>
* If the promotions are successful, schedules the {@link RepartitioningTask}. If the process was not successful
* it will trigger a {@link ControlTask} to restart the partition table repair process.
* <p>
* Invoked on the master node. Acquires partition service lock when scheduling the tasks on the migration queue.
*/
private class RepairPartitionTableTask implements MigrationRunnable {
@Override
public void run() {
if (!partitionStateManager.isInitialized()) {
return;
}
Map<Address, Collection<MigrationInfo>> promotions = removeUnknownAddressesAndCollectPromotions();
boolean success = promoteBackupsForMissingOwners(promotions);
partitionServiceLock.lock();
try {
if (success) {
if (logger.isFinestEnabled()) {
logger.finest("RepartitioningTask scheduled");
}
migrationQueue.add(new RepartitioningTask());
} else {
triggerControlTask();
}
} finally {
partitionServiceLock.unlock();
}
}
/**
* Removes addresses from the partition table which are not registered as cluster members and checks
* if any partitions need promotion (partition owners are missing).
* Invoked on the master node. Acquires partition service lock.
*
* @return promotions that need to be sent, grouped by target address
*/
private Map<Address, Collection<MigrationInfo>> removeUnknownAddressesAndCollectPromotions() {
partitionServiceLock.lock();
try {
partitionStateManager.removeUnknownAddresses();
Map<Address, Collection<MigrationInfo>> promotions = new HashMap<Address, Collection<MigrationInfo>>();
for (int partitionId = 0; partitionId < partitionService.getPartitionCount(); partitionId++) {
MigrationInfo migration = createPromotionMigrationIfOwnerIsNull(partitionId);
if (migration == null) {
continue;
}
Collection<MigrationInfo> migrations = promotions.get(migration.getDestination());
if (migrations == null) {
migrations = new ArrayList<MigrationInfo>();
promotions.put(migration.getDestination(), migrations);
}
migrations.add(migration);
}
return promotions;
} finally {
partitionServiceLock.unlock();
}
}
/**
* Sends promotions to the destinations and commits if the destinations successfully process these promotions.
* Called on the master node.
*
* @param promotions the promotions that need to be sent, grouped by target address
* @return if all promotions were successful
*/
private boolean promoteBackupsForMissingOwners(Map<Address, Collection<MigrationInfo>> promotions) {
boolean allSucceeded = true;
for (Map.Entry<Address, Collection<MigrationInfo>> entry : promotions.entrySet()) {
Address destination = entry.getKey();
Collection<MigrationInfo> migrations = entry.getValue();
allSucceeded &= commitPromotionMigrations(destination, migrations);
}
return allSucceeded;
}
/**
* Sends promotions to the destination and commits the {@code migrations} if successful. Called on the master node.
*
* @param destination the promotion destination
* @param migrations the promotion migrations
* @return if the promotions were successful
*/
private boolean commitPromotionMigrations(Address destination, Collection<MigrationInfo> migrations) {
boolean success = commitPromotionsToDestination(destination, migrations);
boolean local = node.getThisAddress().equals(destination);
if (!local) {
processPromotionCommitResult(destination, migrations, success);
}
partitionService.syncPartitionRuntimeState();
return success;
}
/**
* Applies the {@code migrations} to the local partition table if {@code success} is {@code true}.
* In any case it will increase the partition state version.
* Called on the master node. This method will acquire the partition service lock.
*
* @param destination the promotion destination
* @param migrations the promotions for the destination
* @param success if the {@link PromotionCommitOperation} were successfully processed by the {@code destination}
*/
private void processPromotionCommitResult(Address destination, Collection<MigrationInfo> migrations,
boolean success) {
partitionServiceLock.lock();
try {
if (!partitionStateManager.isInitialized()) {
// node reset/terminated while running task
return;
}
if (success) {
for (MigrationInfo migration : migrations) {
InternalPartitionImpl partition = partitionStateManager.getPartitionImpl(migration.getPartitionId());
assert partition.getOwnerOrNull() == null : "Owner should be null: " + partition;
assert destination.equals(partition.getReplicaAddress(migration.getDestinationCurrentReplicaIndex()))
: "Invalid replica! Destination: " + destination + ", index: "
+ migration.getDestinationCurrentReplicaIndex() + ", " + partition;
// single partition update increments partition state version by 1
partition.swapAddresses(0, migration.getDestinationCurrentReplicaIndex());
}
} else {
int delta = migrations.size() + 1;
partitionService.getPartitionStateManager().incrementVersion(delta);
}
} finally {
partitionServiceLock.unlock();
}
}
/**
* Constructs a promotion migration if the partition owner is {@code null} and there exists a non-{@code null} replica.
* If there are no other replicas, it will send a {@link IPartitionLostEvent}.
*
* @param partitionId the partition ID to check
* @return the migration info or {@code null} if the partition owner is assigned
*/
private MigrationInfo createPromotionMigrationIfOwnerIsNull(int partitionId) {
InternalPartitionImpl partition = partitionStateManager.getPartitionImpl(partitionId);
if (partition.getOwnerOrNull() == null) {
Address destination = null;
int index = 1;
for (int i = index; i < InternalPartition.MAX_REPLICA_COUNT; i++) {
destination = partition.getReplicaAddress(i);
if (destination != null) {
index = i;
break;
}
}
if (logger.isFinestEnabled()) {
if (destination != null) {
logger.finest("partitionId=" + partition.getPartitionId() + " owner is removed. replicaIndex=" + index
+ " will be shifted up to 0. " + partition);
} else {
logger.finest("partitionId=" + partition.getPartitionId()
+ " owner is removed. there is no other replica to shift up. " + partition);
}
}
if (destination != null) {
String destinationUuid = getMemberUuid(destination);
MigrationInfo migration =
new MigrationInfo(partitionId, null, null, destination, destinationUuid, -1, -1, index, 0);
migration.setMaster(node.getThisAddress());
migration.setStatus(MigrationInfo.MigrationStatus.SUCCESS);
return migration;
}
}
if (partition.getOwnerOrNull() == null) {
logger.warning("partitionId=" + partitionId + " is completely lost!");
PartitionEventManager partitionEventManager = partitionService.getPartitionEventManager();
partitionEventManager.sendPartitionLostEvent(partitionId, InternalPartition.MAX_BACKUP_COUNT);
}
return null;
}
/**
* Creates a new partition table by applying the {@code migrations} and send them via {@link PromotionCommitOperation}
* to the destination.
*
* @return true if the promotions were applied on the destination
*/
private boolean commitPromotionsToDestination(Address destination, Collection<MigrationInfo> migrations) {
assert migrations.size() > 0 : "No promotions to commit! destination=" + destination;
MemberImpl member = node.getClusterService().getMember(destination);
if (member == null) {
logger.warning("Destination " + destination + " is not member anymore");
return false;
}
try {
if (logger.isFinestEnabled()) {
logger.finest("Sending commit operation to " + destination + " for " + migrations);
}
PartitionRuntimeState partitionState = partitionService.createPromotionCommitPartitionState(migrations);
String destinationUuid = member.getUuid();
PromotionCommitOperation op = new PromotionCommitOperation(partitionState, migrations, destinationUuid);
Future<Boolean> future = nodeEngine.getOperationService()
.createInvocationBuilder(SERVICE_NAME, op, destination)
.setTryCount(Integer.MAX_VALUE)
.setCallTimeout(Long.MAX_VALUE).invoke();
boolean result = future.get();
if (logger.isFinestEnabled()) {
logger.finest("Promotion commit result " + result + " from " + destination
+ " for migrations " + migrations);
}
return result;
} catch (Throwable t) {
logPromotionCommitFailure(destination, migrations, t);
}
return false;
}
private void logPromotionCommitFailure(Address destination, Collection<MigrationInfo> migrations, Throwable t) {
boolean memberLeft = t instanceof MemberLeftException
|| t.getCause() instanceof TargetNotMemberException
|| t.getCause() instanceof HazelcastInstanceNotActiveException;
int migrationsSize = migrations.size();
if (memberLeft) {
if (node.getThisAddress().equals(destination)) {
logger.fine("Promotion commit failed for " + migrationsSize + " migrations"
+ " since this node is shutting down.");
return;
}
if (logger.isFinestEnabled()) {
logger.warning("Promotion commit failed for " + migrations
+ " since destination " + destination + " left the cluster");
} else {
logger.warning("Promotion commit failed for "
+ (migrationsSize == 1 ? migrations.iterator().next() : migrationsSize + " migrations")
+ " since destination " + destination + " left the cluster");
}
return;
}
if (logger.isFinestEnabled()) {
logger.severe("Promotion commit to " + destination + " failed for " + migrations, t);
} else {
logger.severe("Promotion commit to " + destination + " failed for "
+ (migrationsSize == 1 ? migrations.iterator().next() : migrationsSize + " migrations"), t);
}
}
}
/**
* Task scheduled on the master node to fetch and repair the latest partition table.
* It will first check if we need to fetch the new partition table and schedule a task to do so, along with a new
* {@link ControlTask} to be executed afterwards. If we don't need to fetch the partition table it will send a
* {@link RepairPartitionTableTask} to repair the existing partition table.
* Invoked on the master node. It will acquire the partition service lock.
*
* @see InternalPartitionServiceImpl#isFetchMostRecentPartitionTableTaskRequired()
*/
private class ControlTask implements MigrationRunnable {
@Override
public void run() {
partitionServiceLock.lock();
try {
migrationQueue.clear();
if (partitionService.scheduleFetchMostRecentPartitionTableTaskIfRequired()) {
if (logger.isFinestEnabled()) {
logger.finest("FetchMostRecentPartitionTableTask scheduled");
}
migrationQueue.add(new ControlTask());
return;
}
if (logger.isFinestEnabled()) {
logger.finest("RepairPartitionTableTask scheduled");
}
migrationQueue.add(new RepairPartitionTableTask());
} finally {
partitionServiceLock.unlock();
}
}
}
/**
* Processes shutdown requests, either for this node or for other members of the cluster. If all members requested
* shutdown it will simply send the shutdown response, otherwise checks if any member is still in the partition table
* and triggers the control task.
* Invoked on the master node. Acquires partition service lock.
*/
private class ProcessShutdownRequestsTask implements MigrationRunnable {
@Override
public void run() {
if (!node.isMaster()) {
return;
}
partitionServiceLock.lock();
try {
final int shutdownRequestCount = shutdownRequestedAddresses.size();
if (shutdownRequestCount > 0) {
if (shutdownRequestCount == nodeEngine.getClusterService().getSize(DATA_MEMBER_SELECTOR)) {
for (Address address : shutdownRequestedAddresses) {
sendShutdownOperation(address);
}
} else {
boolean present = false;
for (Address address : shutdownRequestedAddresses) {
if (partitionStateManager.isAbsentInPartitionTable(address)) {
sendShutdownOperation(address);
} else {
logger.warning(address + " requested to shutdown but still in partition table");
present = true;
}
}
if (present) {
triggerControlTask();
}
}
}
} finally {
partitionServiceLock.unlock();
}
}
}
}