package org.infinispan.query.indexmanager;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.atomic.AtomicInteger;
import org.hibernate.search.backend.BackendFactory;
import org.infinispan.factories.ComponentRegistry;
import org.infinispan.notifications.Listener;
import org.infinispan.notifications.cachemanagerlistener.annotation.ViewChanged;
import org.infinispan.notifications.cachemanagerlistener.event.ViewChangedEvent;
import org.infinispan.query.logging.Log;
import org.infinispan.remoting.rpc.RpcManager;
import org.infinispan.remoting.transport.Address;
import org.infinispan.util.logging.LogFactory;
import net.jcip.annotations.GuardedBy;
/**
* Manages the current state of being a "master" node or a node delegating index
* update operations to other nodes.
* In a static cluster this would have been a boolean state, but a state machine
* is modelled here to cope with transitions between:
*
* Initialization of a node - still not having enough information on the cluster
* Becoming a master because of previous master failure / shutdown
* Forfaiting the master role (useful for cluster merges)
*
* The transition to become a master goes via different phases, and at each state
* the process is reversible. So for example if operations have been put on hold
* while the node is being upgraded, but then the master election is moved to a
* different node quickly (cluster startup scenario), the buffered operations
* will be forwarded to the last backend.
* A node being forwarded update operations but not being the master anymore,
* will re-forward the payload to the new master: stability by induction.
*
* The solution is rather poor at managing cluster Merge operations, but we
* need to build on upcoming functionality from Infinispan core for that; for
* example the index content wouldn't be consistent either so one would likely
* need to wipe the index and rebuild it.
* Also we're dealing with the inherent limitation of a "cluster wide lock"
* concept not being compatible with sub-groups of nodes in which a new lead
* might be elected and a lock per group might have been created.
*
* A lock cleanup is not too aggressive: in case a stale lock is detected,
* scheduled work is postponed. This implies that in such situations in which
* a stale lock needs to be cleaned up, index operations might not be visible
* to the transaction committer.
* I've chosen for this option as the lesser evil vs. blocking incoming RPCs,
* although if the buffer for postponed operations gets filled too quickly,
* we'll both speed up the lock acquisition and apply backpressure to the clients.
*
* @author Sanne Grinovero <sanne@hibernate.org> (C) 2014 Red Hat Inc.
* @since 7.0
*/
@Listener
final class ClusteredSwitchingBackend implements LazyInitializableBackend {
private static final Log log = LogFactory.getLog(ClusteredSwitchingBackend.class, Log.class);
/**
* Each attempt introduces approximately 10 seconds delay, and waiting
* more should never be reasonable as it means we're handling a cluster merge.
* Infinispan doesn't currently handle merges, so in that case the index
* is probably corrupted: no point in keeping the lock either.
* The only reason to wait for it is to handle very brief merges caused by
* occasional high load, or in case users are writing directly to the index.
* Sustained direct writes to the index should not be done either, at least
* not without disabling index exclusivity which implies the lock will be
* available in a shorter time.
*/
private static final int MAX_LOCK_ACQUISITION_ATTEMPTS = 2;
private final Address localAddress;
private final RpcManager rpcManager;
private final LocalBackendFactory factory;
private final IndexLockController indexlock;
private final boolean async;
private final String indexName;
private final String cacheName;
/**
* Monotonically increasing view identification sequence:
* we use it to ignore stale events.
* FIXME: why are ids just an int? Is that going to be enough?
*/
private final AtomicInteger lastSeenViewId = new AtomicInteger(-1);
private volatile Address currentMaster;
private volatile IndexingBackend currentBackend;
@GuardedBy("this")
private boolean initialized = false;
@GuardedBy("this")
private int masterLockAcquisitionAttempts = 0;
ClusteredSwitchingBackend(Properties props, ComponentRegistry componentsRegistry, String indexName, LocalBackendFactory factory, IndexLockController indexlock) {
this.indexName = indexName;
this.factory = factory;
this.indexlock = indexlock;
this.rpcManager = componentsRegistry.getComponent(RpcManager.class);
this.cacheName = componentsRegistry.getCacheName();
if (rpcManager == null) {
throw new IllegalStateException("This Cache is not clustered! The switching backend should not be used for local caches");
}
this.localAddress = rpcManager.getAddress();
this.currentBackend = new LazyInitializingBackend(this);
this.async = !BackendFactory.isConfiguredAsSync(props);
}
@ViewChanged
public void viewChanged(final ViewChangedEvent e) {
final int currentViewId = lastSeenViewId.get();
final int viewId = e.getViewId();
if (viewId > currentViewId) {
if (lastSeenViewId.compareAndSet(currentViewId, viewId)) {
applyViewChangedEvent(e);
}
}
}
@Override
public void initialize() {
// we use lazyInitialize() to postpone operations to last minute:
// avoids unnecessary elections while the initial cluster is formed.
}
@Override
public synchronized void lazyInitialize() {
if (initialized) {
return;
}
this.initialized = true;
final List<Address> members = rpcManager.getMembers();
assert members != null;
assert members.size() > 0;
assert members.get(0) != null;
final Address initialMaster = members.get(0);
lastSeenViewId.set(rpcManager.getTransport().getViewId());
if (thisIsNewMaster(initialMaster)) {
acquireControlStart();
} else {
updateRoutingToNewRemote(initialMaster);
}
}
private synchronized void applyViewChangedEvent(ViewChangedEvent e) {
List<Address> newMembers = e.getNewMembers();
if (log.isDebugEnabled()) {
log.debug("Notified of new View! Members: " + newMembers);
}
handleTopologyChange(newMembers);
}
private synchronized void handleTopologyChange(List<Address> newMembers) {
assert newMembers != null;
assert newMembers.size() > 0;
assert newMembers.get(0) != null;
final Address newmaster = newMembers.get(0);
if (masterDidChange(newmaster)) {
if (thisIsMaster()) {
if (log.isDebugEnabled()) {
log.debug("No longer a MASTER node, releasing the index lock.");
}
forfeitControl(newmaster);
} else if (thisIsNewMaster(newmaster)) {
log.debug("Electing SELF as MASTER!");
acquireControlStart();
} else {
updateRoutingToNewRemote(newmaster);
if (log.isDebugEnabled()) {
log.debug("New master elected, now routing updates to node " + newmaster);
}
}
}
}
private boolean thisIsNewMaster(Address newmaster) {
return localAddress.equals(newmaster);
}
private boolean thisIsMaster() {
return localAddress.equals(currentMaster);
}
private boolean masterDidChange(final Address newmaster) {
if (newmaster == null) {
return false;
} else {
return !newmaster.equals(currentMaster);
}
}
private void updateRoutingToNewRemote(final Address newMaster) {
final IndexingBackend newBackend = new RemoteIndexingBackend(cacheName, rpcManager, indexName, newMaster, async);
swapNewBackendIn(newBackend, newMaster);
}
private void acquireControlStart() {
final IndexingBackend backend = new LockAcquiringBackend(this);
this.masterLockAcquisitionAttempts = 0;
swapNewBackendIn(backend, localAddress);
}
private void forfeitControl(Address newMasterAddress) {
final IndexingBackend newBackend = new RemoteIndexingBackend(cacheName, rpcManager, indexName, newMasterAddress, async);
swapNewBackendIn(newBackend, newMasterAddress);
}
private void swapNewBackendIn(IndexingBackend newBackend, Address newMasterAddress) {
final IndexingBackend oldBackend = currentBackend;
log.debugv("Swapping from backend {0} to {1}'", oldBackend, newBackend);
this.currentBackend = newBackend;
this.currentMaster = newMasterAddress;
closeBackend(oldBackend, currentBackend);
}
@Override
public void shutdown() {
closeBackend(currentBackend, null);
this.currentBackend = null;
}
@Override
public IndexingBackend getCurrentIndexingBackend() {
return currentBackend;
}
@Override
public void refresh() {
handleTopologyChange(rpcManager.getMembers());
}
private static void closeBackend(final IndexingBackend oldOne, final IndexingBackend replacement) {
if (oldOne != null) {
oldOne.flushAndClose(replacement);
}
}
@Override
public synchronized boolean attemptUpgrade(IndexingBackend expectedBackend) {
log.trace("owning lock for attemptUpgrade(IndexingBackend)");
if (currentBackend != expectedBackend) {
//This needs to be checked while holding the lock
return true;
}
if (masterLockAcquisitionAttempts >= MAX_LOCK_ACQUISITION_ATTEMPTS) {
indexlock.forceLockClear();
swapNewBackendIn(factory.createLocalIndexingBackend(), localAddress);
return true;
} else {
masterLockAcquisitionAttempts++;
}
if (indexlock.waitForAvailability()) {
swapNewBackendIn(factory.createLocalIndexingBackend(), localAddress);
return true;
} else {
log.trace("Index lock not available: index update operations postponed.");
return false;
}
}
}