/*
* JBoss, Home of Professional Open Source
* Copyright 2011 Red Hat Inc. and/or its affiliates and other contributors
* as indicated by the @author tags. All rights reserved.
* See the copyright.txt in the distribution for a
* full listing of individual contributors.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License, v. 2.1.
* This program is distributed in the hope that it will be useful, but WITHOUT A
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public License,
* v.2.1 along with this distribution; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
package org.infinispan.cacheviews;
import org.infinispan.CacheException;
import org.infinispan.commands.control.CacheViewControlCommand;
import org.infinispan.config.ConfigurationException;
import org.infinispan.config.GlobalConfiguration;
import org.infinispan.configuration.cache.Configuration;
import org.infinispan.factories.annotations.ComponentName;
import org.infinispan.factories.annotations.Inject;
import org.infinispan.factories.annotations.Start;
import org.infinispan.factories.annotations.Stop;
import org.infinispan.manager.EmbeddedCacheManager;
import org.infinispan.notifications.Listener;
import org.infinispan.notifications.cachemanagerlistener.CacheManagerNotifier;
import org.infinispan.notifications.cachemanagerlistener.annotation.Merged;
import org.infinispan.notifications.cachemanagerlistener.annotation.ViewChanged;
import org.infinispan.notifications.cachemanagerlistener.event.ViewChangedEvent;
import org.infinispan.remoting.responses.Response;
import org.infinispan.remoting.responses.SuccessfulResponse;
import org.infinispan.remoting.rpc.ResponseMode;
import org.infinispan.remoting.transport.Address;
import org.infinispan.remoting.transport.Transport;
import org.infinispan.util.concurrent.ConcurrentMapFactory;
import org.infinispan.util.logging.Log;
import org.infinispan.util.logging.LogFactory;
import java.util.*;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import static org.infinispan.factories.KnownComponentNames.ASYNC_TRANSPORT_EXECUTOR;
/**
* CacheViewsManager implementation.
* <p/>
* It uses {@link org.infinispan.commands.control.CacheViewControlCommand}s to organize the installation of cache views in two phases.
* <p/>
* There are three phases in installing a cache view:
* <ol>
* <li>A node wants to start or stop the cache, sending a REQUEST_JOIN or a REQUEST_LEAVE.
* A node leaving the JGroups cluster is interpreted as a REQUEST_LEAVE for all its caches.
* The request will be broadcast to all the cluster members, as all the nodes need to stop sending requests to the leavers.
* <li>For join requests, the cache views manager will wait for a short period of time to allow other members to join.
* <li>The coordinator then sends a PREPARE_VIEW to all the nodes that have the cache started (or starting).
* Any node can veto the view by throwing an exception in this phase.
* <li>The coordinator sends a COMMIT_VIEW to all the nodes that have the cache started.
* <li>If a node threw an exception during PREPARE_VIEW, the coordinator will send a ROLLBACK_VIEW instead.<br>
* After a configurable amount of time the coordinator may retry to install the view, but with a different
* view id (even if the members are the same; this makes it simpler to implement).
* </ul>
* <p/>
* Only the coordinator keeps the information about which nodes have requested to join, so when
* the coordinator changes the new coordinator will have to request state from all the members using
* the RECOVER_VIEW command. This also happens after a merge, even if the new coordinator was a coordinator
* in one of the partitions. For a full description of the view recovery algorithm see {@link #recoverViews()}
*
* @author Dan Berindei <dan@infinispan.org>
* @author Pedro Ruivo
* @since 5.1
*/
public class CacheViewsManagerImpl implements CacheViewsManager {
private static final Log log = LogFactory.getLog(CacheViewsManagerImpl.class);
public static final String DUMMY_CACHE_NAME_FOR_GLOBAL_COMMANDS = "__dummy_cache_name_for_global_commands__";
//private GlobalComponentRegistry gcr;
private CacheManagerNotifier cacheManagerNotifier;
private Transport transport;
private volatile boolean running = false;
private volatile List<Address> members;
private volatile Address self;
private volatile Address coordinator;
private volatile boolean isCoordinator;
private volatile boolean shouldRecoverViews;
// the complete state of every cache in the cluster
// entries can only be added, never removed
private final ConcurrentMap<String, CacheViewInfo> viewsInfo = ConcurrentMapFactory.makeConcurrentMap();
// only used if this node is the coordinator
private long timeout = 10 * 1000;
// TODO Make the cooldown configurable, or change the view installation timing altogether
private long viewChangeCooldown = 1 * 1000;
private ViewListener listener = new ViewListener();
// A single thread examines the unprepared changes and decides whether to install a new view for all the caches
private ViewTriggerThread viewTriggerThread;
private ExecutorService cacheViewInstallerExecutor;
private ExecutorService asyncTransportExecutor;
private EmbeddedCacheManager cacheManager;
public CacheViewsManagerImpl() {
}
@Inject
public void init(CacheManagerNotifier cacheManagerNotifier, Transport transport,
@ComponentName(ASYNC_TRANSPORT_EXECUTOR) ExecutorService e,
GlobalConfiguration globalConfiguration, EmbeddedCacheManager cacheManager) {
this.cacheManagerNotifier = cacheManagerNotifier;
this.transport = transport;
this.asyncTransportExecutor = e;
// TODO Try to implement a "total view installation time budget" instead of the current per-operation timeout
this.timeout = globalConfiguration.getDistributedSyncTimeout();
this.cacheManager = cacheManager;
}
// Start after JGroupsTransport so that we have a view already
@Start(priority = 11)
public void start() throws Exception {
if (transport == null)
throw new ConfigurationException("CacheViewManager only works in clustered caches");
self = transport.getAddress();
running = true;
// TODO make the cache view installer executor configurable
ThreadFactory tfViewInstaller = new ThreadFactory() {
private volatile AtomicInteger count = new AtomicInteger(0);
@Override
public Thread newThread(Runnable r) {
return new Thread(r, "CacheViewInstaller-" + count.incrementAndGet() + "," + self);
}
};
cacheViewInstallerExecutor = Executors.newCachedThreadPool(tfViewInstaller);
viewTriggerThread = new ViewTriggerThread();
viewTriggerThread.start();
cacheManagerNotifier.addListener(listener);
// The listener already missed the initial view
handleNewView(transport.getMembers(), false, true);
// TODO Request an initial view of all the caches in the cluster and maintain that view
// so that a node can use the cache without ever joining and starting to hold data.
}
@Stop(priority = 0)
public void stop() {
cacheManagerNotifier.removeListener(listener);
running = false;
viewTriggerThread.wakeUp();
cacheViewInstallerExecutor.shutdown();
try {
viewTriggerThread.join(timeout);
if (viewTriggerThread.isAlive()) {
log.debugf("The cache view trigger thread did not stop in %d millis", timeout);
}
cacheViewInstallerExecutor.awaitTermination(timeout, TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
// reset interruption flag
Thread.currentThread().interrupt();
}
}
@Override
public CacheView getCommittedView(String cacheName) {
return viewsInfo.get(cacheName).getCommittedView();
}
@Override
public CacheView getPendingView(String cacheName) {
return viewsInfo.get(cacheName).getPendingView();
}
@Override
public Set<Address> getLeavers(String cacheName) {
return viewsInfo.get(cacheName).getPendingChanges().getLeavers();
}
@Override
public void join(String cacheName, CacheViewListener listener) throws Exception {
// first keep track of the join locally
CacheViewInfo cacheViewInfo = getCacheViewInfo(cacheName);
cacheViewInfo.setListener(listener);
handleRequestJoin(self, cacheName);
// then ask the coordinator to join and use its existing cache view
if (!isCoordinator) {
final CacheViewControlCommand cmd = new CacheViewControlCommand(cacheName,
CacheViewControlCommand.Type.REQUEST_JOIN, self);
// If we get a SuspectException we can ignore it, the new coordinator will come asking for our state anyway
Map<Address,Response> rspList = transport.invokeRemotely(Collections.singleton(coordinator), cmd,
ResponseMode.SYNCHRONOUS_IGNORE_LEAVERS, timeout, false, null, false, false);
checkRemoteResponse(cacheName, cmd, rspList);
}
}
@Override
public void leave(String cacheName) {
log.tracef("Stopping local cache %s", cacheName);
try {
// remove the local listener
viewsInfo.get(cacheName).setListener(null);
// update the local cache state
handleRequestLeave(self, cacheName);
// finally broadcast the leave request to all the members
final CacheViewControlCommand cmd = new CacheViewControlCommand(cacheName,
CacheViewControlCommand.Type.REQUEST_LEAVE, self);
// ignore any response from the other members
transport.invokeRemotely(members, cmd, ResponseMode.ASYNCHRONOUS, timeout, false, null, false, false);
} catch (Exception e) {
log.debugf(e, "%s: Error while leaving cache view", cacheName);
}
}
/**
* Called on the coordinator to install a new view in the cluster.
* It follows the protocol in the class description.
*/
boolean clusterInstallView(String cacheName, CacheView newView, int replicationDegree) throws Exception {
CacheViewInfo cacheViewInfo = viewsInfo.get(cacheName);
boolean success = false;
try {
log.debugf("Installing new view %s for cache %s", newView, cacheName);
clusterPrepareView(cacheName, newView, replicationDegree);
Set<Address> leavers = cacheViewInfo.getPendingChanges().getLeavers();
if (cacheViewInfo.getPendingView().containsAny(leavers)) {
log.debugf("Cannot commit cache view %s, some nodes already left the cluster: %s",
cacheViewInfo.getPendingView(), leavers);
// will still run the rollback
return false;
}
success = true;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
} catch (Throwable t) {
log.cacheViewPrepareFailure(t, newView, cacheName, cacheViewInfo.getCommittedView());
} finally {
// Cache manager is shutting down, don't try to commit or roll back
if (!isRunning())
return false;
if (success) {
clusterCommitView(cacheName, newView.getViewId(), newView.getMembers(), true);
log.debugf("Successfully installed view %s for cache %s", newView, cacheName);
} else {
CacheView previousCommittedView = cacheViewInfo.getCommittedView();
clusterRollbackView(cacheName, previousCommittedView.getViewId(), newView.getMembers(), true);
log.debugf("Rolled back to view %s for cache %s", previousCommittedView, cacheName);
}
}
return success;
}
/**
* The prepare phase of view installation.
*/
private CacheView clusterPrepareView(final String cacheName, final CacheView pendingView, final int replicationDegree) throws Exception {
final CacheViewInfo cacheViewInfo = viewsInfo.get(cacheName);
final CacheView committedView = cacheViewInfo.getCommittedView();
final List<CacheView> viewHistory = cacheViewInfo.getViewHistory();
log.tracef("%s: Preparing view %d on members %s", cacheName, pendingView.getViewId(), pendingView.getMembers());
final CacheViewControlCommand cmd = new CacheViewControlCommand(cacheName,
CacheViewControlCommand.Type.PREPARE_VIEW, self, pendingView.getViewId(),
pendingView.getMembers(), committedView.getViewId(), committedView.getMembers(),
viewHistory);
cmd.setReplicationDegree(replicationDegree);
Set<Address> leavers = cacheViewInfo.getPendingChanges().getLeavers();
if (pendingView.containsAny(leavers))
throw new IllegalStateException("Cannot prepare cache view " + pendingView + ", some nodes already left the cluster: " + leavers);
Configuration configuration = getConfiguration(cacheName);
if (configuration.transaction().transactionProtocol().isTotalOrder()) {
boolean distributed = configuration.clustering().cacheMode().isDistributed();
//in total order, the coordinator must process the prepare view command.
List<Address> pendingViewMembers = new LinkedList<Address>(pendingView.getMembers());
pendingViewMembers.add(self);
Map<Address, Response> rspList = transport.invokeRemotely(pendingViewMembers, cmd, ResponseMode.SYNCHRONOUS, timeout, false,
null, true, distributed);
checkRemoteResponse(cacheName, cmd, rspList);
} else {
// broadcast the command to the targets, which will skip the local node
Future<Map<Address, Response>> remoteFuture = asyncTransportExecutor.submit(new Callable<Map<Address, Response>>() {
@Override
public Map<Address, Response> call() throws Exception {
Map<Address, Response> rspList = transport.invokeRemotely(pendingView.getMembers(), cmd,
ResponseMode.SYNCHRONOUS, timeout, false, null, false, false);
return rspList;
}
});
// now invoke the command on the local node
Future<Object> localFuture = asyncTransportExecutor.submit(new Callable<Object>() {
@Override
public Object call() throws Exception {
handlePrepareView(cacheName, pendingView, committedView, viewHistory, replicationDegree);
return null;
}
});
// wait for the remote commands to finish
Map<Address, Response> rspList = remoteFuture.get(timeout, TimeUnit.MILLISECONDS);
checkRemoteResponse(cacheName, cmd, rspList);
// now wait for the local command
localFuture.get(timeout, TimeUnit.MILLISECONDS);
}
return pendingView;
}
/**
* The rollback phase of view installation.
*/
private void clusterRollbackView(final String cacheName, int committedViewId, List<Address> targets, boolean includeCoordinator) {
final CacheViewInfo cacheViewInfo = viewsInfo.get(cacheName);
// TODO Remove the rollback view id and instead add a pending view to the recovery response
// If the coordinator dies while sending the rollback commands, some nodes may install the new view id and some may not.
// If that happens the recovery process will try to commit the highest view id, which is wrong because we need to rollback.
final int newViewId = cacheViewInfo.getPendingChanges().getRollbackViewId();
final List<Address> validTargets = new ArrayList<Address>(targets);
validTargets.removeAll(cacheViewInfo.getPendingChanges().getLeavers());
log.tracef("%s: Rolling back to cache view %d on members %s, new view id is %d", cacheName, committedViewId, validTargets, newViewId);
try {
// it's ok to send the rollback to nodes that don't have the cache yet, they will just ignore it
// on the other hand we *have* to send the rollback to any nodes that got the prepare
final CacheViewControlCommand cmd = new CacheViewControlCommand(cacheName,
CacheViewControlCommand.Type.ROLLBACK_VIEW, self, newViewId, null, committedViewId, null, null);
// wait until we get all the responses, but ignore the results
Map<Address, Response> rspList = transport.invokeRemotely(validTargets, cmd,
ResponseMode.SYNCHRONOUS_IGNORE_LEAVERS, timeout, false, null, false, false);
checkRemoteResponse(cacheName, cmd, rspList);
} catch (Throwable t) {
log.cacheViewRollbackFailure(t, committedViewId, cacheName);
}
// in the end we roll back locally, so any pending changes can trigger a new view installation
if (includeCoordinator || validTargets.contains(self)) {
try {
handleRollbackView(cacheName, newViewId, committedViewId);
} catch (Throwable t) {
log.cacheViewRollbackFailure(t, committedViewId, cacheName);
}
}
}
/**
* The commit phase of view installation.
*/
private void clusterCommitView(final String cacheName, final int viewId, List<Address> targets, boolean includeCoordinator) {
CacheViewInfo cacheViewInfo = viewsInfo.get(cacheName);
final List<Address> validTargets = new ArrayList<Address>(targets);
// TODO Retry the commit if one of the targets left the cluster (even with this precaution)
validTargets.removeAll(cacheViewInfo.getPendingChanges().getLeavers());
log.tracef("%s: Committing cache view %d on members %s", cacheName, viewId, targets);
try {
// broadcast the command to all the members
final CacheViewControlCommand cmd = new CacheViewControlCommand(cacheName,
CacheViewControlCommand.Type.COMMIT_VIEW, self, viewId);
// wait until we get all the responses, but ignore the results
Map<Address, Response> rspList = transport.invokeRemotely(validTargets, cmd,
ResponseMode.SYNCHRONOUS_IGNORE_LEAVERS, timeout, false, null, false, false);
checkRemoteResponse(cacheName, cmd, rspList);
} catch (Throwable t) {
log.cacheViewCommitFailure(t, viewId, cacheName);
}
// in the end we commit locally, so any pending changes can trigger a new view installation
if (includeCoordinator || validTargets.contains(self)) {
try {
handleCommitView(cacheName, viewId);
} catch (Throwable t) {
log.cacheViewCommitFailure(t, viewId, cacheName);
}
}
}
/**
* Handle a join request.
* Even if this node is not the coordinator this method will still be called for local caches.
*/
@Override
public void handleRequestJoin(Address sender, String cacheName) {
log.debugf("%s: Node %s is joining", cacheName, sender);
CacheViewInfo cacheViewInfo = getCacheViewInfo(cacheName);
// When the coordinator changes there are two possibilities:
// * either we realize we're the new coordinator first and the join request comes afterwards,
// in which case it will trigger a view installation
// * or the joiner sees us as the coordinator first and we add a join request to the pending changes list
// even though we are not the coordinator, and then recoverViews() will trigger the view installation
// If we die the joiner will get a RECOVER_VIEW command from the new coordinator
// so the join request will not be lost.
cacheViewInfo.getPendingChanges().requestJoin(sender);
viewTriggerThread.wakeUp();
}
/**
* Handle the request to move keys by Li
*/
public void handleRequestMoveKeys(String cacheName){
CacheViewInfo cacheViewInfo = getCacheViewInfo(cacheName);
cacheViewInfo.getPendingChanges().requestMoveKeys();
viewTriggerThread.wakeUp();
}
@Override
public void handleReplicationDegree(String cacheName, int replicationDegree) {
CacheViewInfo cacheViewInfo = getCacheViewInfo(cacheName);
cacheViewInfo.getPendingChanges().requestNewReplicationDegree(replicationDegree);
viewTriggerThread.wakeUp();
}
/**
* Get the {@code CacheViewInfo} for a cache, or create it with an empty view if it doesn't exist yet.
*/
private CacheViewInfo getCacheViewInfo(String cacheName) {
CacheViewInfo cacheViewInfo = viewsInfo.get(cacheName);
if (cacheViewInfo == null) {
// this is the first node to join the cache, create an empty initial view
cacheViewInfo = new CacheViewInfo(cacheName, CacheView.EMPTY_CACHE_VIEW);
CacheViewInfo oldInfo = viewsInfo.putIfAbsent(cacheName, cacheViewInfo);
// if there was an entry already, use that; otherwise use our entry
if (oldInfo != null) {
cacheViewInfo = oldInfo;
}
}
return cacheViewInfo;
}
@Override
public void handleRequestLeave(Address sender, String cacheName) {
handleLeavers(Collections.singleton(sender), cacheName);
viewTriggerThread.wakeUp();
}
private void handleLeavers(Collection<Address> leavers, String cacheName) {
CacheViewInfo cacheViewInfo = viewsInfo.get(cacheName);
if (cacheViewInfo == null)
return;
log.tracef("%s: Received leave request from nodes %s", cacheName, leavers);
// update the list of leavers - this is only relevant on the coordinator
if (isCoordinator) {
cacheViewInfo.getPendingChanges().requestLeave(leavers);
}
// Since the messages are OOB, it is possible to receive the leave message only after the new view has been
// prepared (or even committed). In that case there isn't going to be another prepare, so we shouldn't call
// listener.waitForPrepare()
if (cacheViewInfo.getPendingView() != null || !cacheViewInfo.getCommittedView().containsAny(leavers))
return;
// tell the upper layer to stop sending commands to the nodes that already left
CacheViewListener cacheViewListener = cacheViewInfo.getListener();
if (cacheViewListener != null) {
cacheViewListener.preInstallView();
}
}
@Override
public void handlePrepareView(String cacheName, CacheView pendingView, CacheView committedView, List<CacheView> viewHistory, int replicationDegree) throws Exception {
boolean isLocal = pendingView.contains(self);
if (getConfiguration(cacheName).transaction().transactionProtocol().isTotalOrder() &&
!isLocal && !isCoordinator) {
log.tracef("%s: Not processing prepare view for %s. It is a total order cache and we are not a member neither" +
" the coordinator.", cacheName, pendingView);
return;
}
CacheViewInfo cacheViewInfo = viewsInfo.get(cacheName);
if (cacheViewInfo == null) {
throw new IllegalStateException(String.format("Received prepare request for cache %s, which is not running", cacheName));
}
log.tracef("%s: Preparing cache view %s, committed view is %s", cacheName, pendingView, committedView);
if (!isLocal && !isCoordinator) {
throw new IllegalStateException(String.format("%s: Received prepare cache view request, but we are not a member. View is %s",
cacheName, pendingView));
}
// The first time we get a PREPARE_VIEW our committed view id is -1, we need to accept any view
CacheView lastCommittedView = cacheViewInfo.getCommittedView();
if (lastCommittedView.getViewId() > 0 && lastCommittedView.getViewId() != committedView.getViewId()) {
log.prepareViewIdMismatch(lastCommittedView, committedView);
}
cacheViewInfo.prepareView(pendingView);
if (isLocal) {
CacheViewListener cacheViewListener = cacheViewInfo.getListener();
if (cacheViewListener != null) {
cacheViewListener.prepareView(pendingView, lastCommittedView, viewHistory, replicationDegree);
} else {
throw new IllegalStateException(String.format("%s: Received cache view prepare request after the local node has already shut down", cacheName));
}
}
// any exception here will be propagated back to the coordinator, which will roll back the view installation
}
@Override
public void handleCommitView(String cacheName, int viewId) {
// on the coordinator: update the committed view and reset the view changes
// on a cache member: call the listener and update the committed view
// on a non-member: do nothing
CacheViewInfo cacheViewInfo = viewsInfo.get(cacheName);
if (cacheViewInfo == null) {
log.tracef("Ignoring view commit for unknown cache %s", cacheName);
return;
}
if (cacheViewInfo.hasPendingView()) {
CacheView viewToCommit = cacheViewInfo.getPendingView();
log.debugf("%s: Committing cache view %s", cacheName, viewToCommit);
CacheViewListener cacheViewListener = cacheViewInfo.getListener();
// we only prepared the view if it was local, so we can't commit it here
boolean isLocal = viewToCommit.contains(self);
if (isLocal && cacheViewListener != null) {
cacheViewListener.commitView(viewId);
}
cacheViewInfo.commitView(viewId);
cacheViewInfo.getPendingChanges().resetChanges(viewToCommit);
if (isLocal && cacheViewListener != null) {
cacheViewListener.postInstallView(viewId);
}
} else {
log.debugf("%s: We don't have a pending view, ignoring commit", cacheName);
}
}
@Override
public void handleRollbackView(String cacheName, int newViewId, int committedViewId) {
CacheViewInfo cacheViewInfo = viewsInfo.get(cacheName);
if (cacheViewInfo == null) {
log.tracef("Ignoring cache view rollback for unknown cache %s", cacheName);
return;
}
if (cacheViewInfo.hasPendingView()) {
log.debugf("%s: Rolling back to cache view %d, new view id is %d", cacheName, committedViewId, newViewId);
CacheViewListener cacheViewListener = cacheViewInfo.getListener();
if (cacheViewListener != null) {
cacheViewListener.rollbackView(newViewId, committedViewId);
}
cacheViewInfo.rollbackView(newViewId, committedViewId);
cacheViewInfo.getPendingChanges().resetChanges(cacheViewInfo.getCommittedView());
} else {
log.debugf("%s: We don't have a pending view, ignoring rollback", cacheName);
}
}
@Override
public Map<String, CacheView> handleRecoverViews() {
Map<String, CacheView> result = new HashMap<String, CacheView>(viewsInfo.size());
for (CacheViewInfo cacheViewInfo : viewsInfo.values()) {
if (cacheViewInfo.getCommittedView().contains(self)) {
result.put(cacheViewInfo.getCacheName(), cacheViewInfo.getCommittedView());
} else if (cacheViewInfo.getListener() != null) {
result.put(cacheViewInfo.getCacheName(), CacheView.EMPTY_CACHE_VIEW);
}
}
return result;
}
@Override
public void gcViewHistory(String cacheName, int minimumViewId) {
CacheViewInfo cacheViewInfo = viewsInfo.get(cacheName);
if (cacheViewInfo == null) {
return;
}
cacheViewInfo.gc(minimumViewId);
}
@Override
public int getViewHistorySize(String cacheName) {
CacheViewInfo cacheViewInfo = viewsInfo.get(cacheName);
return cacheViewInfo == null ? 0 : cacheViewInfo.getViewHistory().size();
}
private void handleNewView(List<Address> newMembers, boolean mergeView, boolean initialView) {
boolean wasCoordinator = isCoordinator;
coordinator = transport.getCoordinator();
isCoordinator = transport.isCoordinator();
if (isCoordinator && (mergeView || !wasCoordinator && !initialView)) {
shouldRecoverViews = true;
log.tracef("Node %s has become the coordinator", self);
}
// The view trigger thread might have just passed the recovery check, so we set the members last
// to ensure that it doesn't start processing leavers before doing the recovery
members = newMembers;
viewTriggerThread.wakeUp();
}
/**
* Check the results of a remote command and throw an exception if any of them is unsuccessful.
*/
private void checkRemoteResponse(String cacheName, CacheViewControlCommand cmd, Map<Address, Response> rspList) {
boolean success = true;
for (Map.Entry<Address, Response> response : rspList.entrySet()) {
Response responseValue = response.getValue();
if (responseValue == null || !responseValue.isSuccessful()) {
success = false;
log.debugf("%s: Received unsuccessful response from node %s: %s", cacheName, response.getKey(), responseValue);
}
}
if (!success) {
throw new CacheException(String.format("Error executing command %s remotely", cmd));
}
}
/**
* When the coordinator changes, the new coordinator has to find what caches are running and on which nodes from
* the other cluster members.
* <p/>
* In addition, the old coordinator (or coordinators, if we have a merge) may have left while there a new view was
* being installed. We have to find the previous partitions and either commit the new view (if we know that all the
* nodes in the partitions prepared the view) or rollback to the previous view (if the prepare didn't finish successfully).
* <p/>
* We cannot use just the view ids of each node because a node could be part of two partitions (e.g. if a cluster
* {A, B} splits but only A notices the split, the merge partitions will be {A} and {A, B}. However we can rely on
* the view id of A's committed view being greater than B's to compute the partitions should be {A} and {B}.
* <p/>
* The algorithm is as follows:
* <ol>
* <li>The new coordinator sends <tt>RECOVER_VIEW</tt> to everyone<br/>
* <li>Each node returns a map of started caches -> last committed view.<br/>
* The returned view may be empty if the node started joining but did not finish.<br/>
* <li>Let CC be the list of all the caches returned by any member.
* <li>For each cache <tt>C</tt> in <tt>CC</tt>:<br/>
* <ol>
* <li> Create a join request for the nodes that have this cache but with an empty view.
* <li> Let <tt>CM</tt> be the list of nodes that have this cache (excluding joiners).
* <li> Sort <tt>CM</tt> by the nodes' last committed view id, in descending order.
* <li>For each node <tt>N</tt> in <tt>CM</tt>:
* <ol>
* <li> Let <tt>NN</tt> be the set of nodes in the committed view of <tt>N</tt>.
* <li> Let <tt>PP</tt> be <tt>intersection(NN, CM)</tt> (<tt>N</tt>'s subpartition).
* <li> Remove all nodes in <tt>PP</tt> from <tt>CM</tt>, so that they won't be processed again.
* <li> Let <tt>minViewId</tt> be lowest 'last committed view id' of all the nodes in <tt>PP</tt>.
* <li> If <tt>minViewId < N's last committed view id</tt> then send a <tt>COMMIT_VIEW</tt> to all the nodes in <tt>PP</tt>.
* A node couldn't have received the commit command if all the others didn't prepare successfully.
* <li> Otherwise send a ROLLBACK_VIEW to go back to N's last committed view id, as we may have a pending view
* in this subpartition.
* </ol>
* <li> If we had more than one iteration it means we had a merge, so we'll need to install a merged view.
* <li> Otherwise we'll rely on the joiners/leavers to trigger a new view.
* </ol>
* </ol>
* <p/>
* TODO Relying on the commit message from the old coordinator can lead to a race condition if one of the nodes received the commit but didn't process it yet.
* We could reply to the RECOVER_VIEW message with the last prepared view in addition to the last committed view,
* and in step 4.4.5 we could send the commit if everyone in the subpartition has finished preparing the view.
*/
private void recoverViews() {
// read the recovery info from every node
final Map<Address, Map<String, CacheView>> recoveryInfo;
try {
// Workaround for ISPN-1640: Wait a short amount of time before sending the RECOVER_VIEW command
Thread.sleep(100);
log.debugf("Node %s is the new coordinator, recovering cache views", self);
recoveryInfo = new HashMap<Address, Map<String, CacheView>>();
// first get the local views
recoveryInfo.put(self, handleRecoverViews());
// then broadcast the recover command to all the members
final CacheViewControlCommand cmd = new CacheViewControlCommand(
DUMMY_CACHE_NAME_FOR_GLOBAL_COMMANDS, CacheViewControlCommand.Type.RECOVER_VIEW, self);
// use unicast instead of broadcast so that the message doesn't reach the target before the merged view is installed
List<Address> tempMembers = members;
List<Future<Map<Address, Response>>> futures = new ArrayList<Future<Map<Address, Response>>>(tempMembers.size());
for (final Address member : tempMembers) {
Future<Map<Address, Response>> future = asyncTransportExecutor.submit(new Callable<Map<Address, Response>>() {
@Override
public Map<Address, Response> call() throws Exception {
return transport.invokeRemotely(Collections.singleton(member), cmd,
ResponseMode.SYNCHRONOUS_IGNORE_LEAVERS, timeout, true, null, false, false);
}
});
futures.add(future);
}
Map<Address, Response> rspList = new HashMap<Address, Response>(tempMembers.size());
for (Future<Map<Address, Response>> future : futures) {
rspList.putAll(future.get());
}
checkRemoteResponse(null, cmd, rspList);
for (Map.Entry<Address, Response> e : rspList.entrySet()) {
SuccessfulResponse value = (SuccessfulResponse) e.getValue();
recoveryInfo.put(e.getKey(), (Map<String, CacheView>) value.getResponseValue());
}
// get the full set of caches
Set<String> cacheNames = new HashSet<String>();
for (Map<String, CacheView> m : recoveryInfo.values()) {
cacheNames.addAll(m.keySet());
}
// now apply the algorithm for each cache
for (final String cacheName : cacheNames) {
CacheViewInfo cacheViewInfo = getCacheViewInfo(cacheName);
// get the list of nodes for this cache
List<Address> recoveredMembers = new ArrayList(recoveryInfo.size());
List<Address> recoveredJoiners = new ArrayList(recoveryInfo.size());
for (Map.Entry<Address, Map<String, CacheView>> nodeRecoveryInfo : recoveryInfo.entrySet()) {
Address node = nodeRecoveryInfo.getKey();
CacheView lastCommittedView = nodeRecoveryInfo.getValue().get(cacheName);
if (lastCommittedView != null) {
// joining nodes will return an empty view
if (lastCommittedView.contains(node)) {
recoveredMembers.add(node);
} else {
recoveredJoiners.add(node);
}
}
}
// sort the collection by the viewId of the current cache
Collections.sort(recoveredMembers, new Comparator<Address>() {
@Override
public int compare(Address o1, Address o2) {
return recoveryInfo.get(o2).get(cacheName).getViewId() -
recoveryInfo.get(o1).get(cacheName).getViewId();
}
});
log.tracef("%s: Recovered members (including joiners) are %s", cacheName, recoveredMembers);
// iterate on the nodes, taking all the nodes in a view as a partition
int partitionCount = 0;
List<Address> membersToProcess = new ArrayList<Address>(recoveredMembers);
List<CacheView> partitions = new ArrayList(2);
while (!membersToProcess.isEmpty()) {
Address node = membersToProcess.get(0);
CacheView committedView = recoveryInfo.get(node).get(cacheName);
int highestViewId = committedView.getViewId();
if (partitionCount == 0) {
// the first partition will have the highest view id, so update our latest view id to match that
// there may have been a prepare going on in this partition, make sure our id is greater than that by adding 1
cacheViewInfo.getPendingChanges().updateLatestViewId(highestViewId + 1);
}
final List<Address> partitionMembers = new ArrayList<Address>(committedView.getMembers());
// exclude from this group nodes that didn't send recovery info
// or that were included in previous groups
partitionMembers.retainAll(membersToProcess);
membersToProcess.removeAll(committedView.getMembers());
// all the partition members could have left in the meantime, skip to the next partition
if (partitionMembers.isEmpty())
continue;
// now we have two situations:
// * either the nodes in the partition have the same view id and we need to roll back
// * or the nodes have different view ids and we need to commit
// TODO Is it possible to receive a COMMIT_VIEW from the old coordinator now, after it left the cluster?
int minViewId = Integer.MAX_VALUE;
for (Address partitionMember : partitionMembers) {
CacheView pmCommittedView = recoveryInfo.get(partitionMember).get(cacheName);
int pmViewId = pmCommittedView.getViewId();
if (pmViewId < minViewId)
minViewId = pmViewId;
}
if (minViewId != highestViewId) {
log.tracef("Found partition %d (%s) that should have committed view id %d but not all of them do (min view id %d), " +
"committing the view", partitionCount, partitionMembers, highestViewId, minViewId);
clusterCommitView(cacheName, highestViewId, partitionMembers, false);
} else {
log.tracef("Found partition %d (%s) that has committed view id %d, sending a rollback command " +
"to clear any pending prepare", partitionCount, partitionMembers, highestViewId);
clusterRollbackView(cacheName, highestViewId, partitionMembers, false);
}
partitions.add(new CacheView(highestViewId, partitionMembers));
partitionCount++;
}
log.debugf("Recovered partitions after merge for cache %s: %s", cacheName, partitions);
// we install a new view even if the member list of this cache didn't change, just to make sure
cacheViewInfo.getPendingChanges().recoveredViews(recoveredMembers, recoveredJoiners);
}
shouldRecoverViews = false;
} catch (Exception e) {
log.error("Error recovering views from the cluster members", e);
}
}
public boolean isRunning() {
return running;
}
/**
* returns the configuration of the cache defined by this name
* @param cacheName the cache name
* @return the configuration of the cache
*/
private Configuration getConfiguration(String cacheName) {
//TODO find a better way to do it?
Configuration c = cacheManager.getCacheConfiguration(cacheName);
if (c == null) {
c = cacheManager.getDefaultCacheConfiguration();
}
return c;
}
/**
* Executed on the coordinator to trigger the installation of new views.
*/
public final class ViewTriggerThread extends Thread {
private final Lock lock = new ReentrantLock();
private final Condition condition = lock.newCondition();
public ViewTriggerThread() {
super("CacheViewTrigger," + self);
setDaemon(true);
// ViewTriggerThread could be created on a user thread, and we don't want to
// hold a reference to that classloader
setContextClassLoader(ViewTriggerThread.class.getClassLoader());
}
public void wakeUp() {
lock.lock();
try {
log.tracef("Waking up cache view installer thread");
condition.signal();
} finally {
lock.unlock();
}
}
@Override
public void run() {
outer: while (isRunning()) {
if (shouldRecoverViews) {
recoverViews();
} else {
lock.lock();
try {
// Ensure at least viewChangeCooldown between cache view changes
condition.await(viewChangeCooldown, TimeUnit.MILLISECONDS);
log.tracef("Woke up, shouldRecoverViews=%s", shouldRecoverViews);
} catch (InterruptedException e) {
// shutting down
break;
} finally {
lock.unlock();
}
}
if (isCoordinator && isRunning()) {
// add leave requests for all the leavers x all the caches
for (CacheViewInfo cacheViewInfo : viewsInfo.values()) {
// need to let the listener know about leavers first
List<Address> leavers = cacheViewInfo.computeLeavers(members);
if (!leavers.isEmpty()) {
handleLeavers(leavers, cacheViewInfo.getCacheName());
}
// check if we are shutting down
if (!isRunning())
return;
// we may have to recover the views before doing anything else
if (shouldRecoverViews) {
continue outer;
}
try {
PendingCacheViewChanges pendingChanges = cacheViewInfo.getPendingChanges();
CacheView pendingView = pendingChanges.createPendingView(cacheViewInfo.getCommittedView());
if (pendingView != null) {
cacheViewInstallerExecutor.submit(new ViewInstallationTask(cacheViewInfo.getCacheName(),
pendingView,
pendingChanges.getReplicationDegree()));
}
} catch (RuntimeException e) {
log.errorTriggeringViewInstallation(e, cacheViewInfo.getCacheName());
}
}
}
}
}
}
/**
* Executed on the coordinator to install a new view in the cluster.
*/
public class ViewInstallationTask implements Callable<Object> {
private final String cacheName;
private final CacheView newView;
private final int replicationDegree;
public ViewInstallationTask(String cacheName, CacheView newView, int replicationDegree) {
this.cacheName = cacheName;
this.newView = newView;
this.replicationDegree = replicationDegree;
}
@Override
public Object call() throws Exception {
try {
clusterInstallView(cacheName, newView, replicationDegree);
} catch (Throwable t) {
log.viewInstallationFailure(t, cacheName);
}
return null;
}
}
@Listener
public class ViewListener {
@Merged
@ViewChanged
public void handleViewChange(final ViewChangedEvent e) {
handleNewView(e.getNewMembers(), e.isMergeView(), e.getViewId() == 0);
}
}
}