package org.infinispan.statetransfer;
import static org.infinispan.factories.KnownComponentNames.ASYNC_TRANSPORT_EXECUTOR;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import org.infinispan.Cache;
import org.infinispan.commands.CommandsFactory;
import org.infinispan.commands.write.WriteCommand;
import org.infinispan.configuration.cache.Configuration;
import org.infinispan.container.DataContainer;
import org.infinispan.container.InternalEntryFactory;
import org.infinispan.distexec.DistributedCallable;
import org.infinispan.distribution.ch.ConsistentHash;
import org.infinispan.distribution.ch.KeyPartitioner;
import org.infinispan.factories.annotations.ComponentName;
import org.infinispan.factories.annotations.Inject;
import org.infinispan.factories.annotations.Start;
import org.infinispan.factories.annotations.Stop;
import org.infinispan.notifications.Listener;
import org.infinispan.notifications.cachelistener.cluster.ClusterCacheNotifier;
import org.infinispan.persistence.manager.PersistenceManager;
import org.infinispan.remoting.rpc.RpcManager;
import org.infinispan.remoting.transport.Address;
import org.infinispan.topology.CacheTopology;
import org.infinispan.transaction.impl.LocalTransaction;
import org.infinispan.transaction.impl.TransactionTable;
import org.infinispan.transaction.xa.CacheTransaction;
import org.infinispan.util.logging.Log;
import org.infinispan.util.logging.LogFactory;
/**
* {@link StateProvider} implementation.
*
* @author anistor@redhat.com
* @since 5.2
*/
@Listener
public class StateProviderImpl implements StateProvider {
private static final Log log = LogFactory.getLog(StateProviderImpl.class);
private static final boolean trace = log.isTraceEnabled();
private String cacheName;
private Configuration configuration;
private RpcManager rpcManager;
private CommandsFactory commandsFactory;
private ClusterCacheNotifier clusterCacheNotifier;
private TransactionTable transactionTable; // optional
private DataContainer dataContainer;
private PersistenceManager persistenceManager; // optional
private ExecutorService executorService;
private StateTransferLock stateTransferLock;
private InternalEntryFactory entryFactory;
private long timeout;
private int chunkSize;
private KeyPartitioner keyPartitioner;
private StateConsumer stateConsumer;
/**
* A map that keeps track of current outbound state transfers by destination address. There could be multiple transfers
* flowing to the same destination (but for different segments) so the values are lists.
*/
private final Map<Address, List<OutboundTransferTask>> transfersByDestination = new HashMap<>();
public StateProviderImpl() {
}
@Inject
public void init(Cache cache,
@ComponentName(ASYNC_TRANSPORT_EXECUTOR) ExecutorService executorService, //TODO Use a dedicated ExecutorService
Configuration configuration,
RpcManager rpcManager,
CommandsFactory commandsFactory,
ClusterCacheNotifier clusterCacheNotifier,
PersistenceManager persistenceManager,
DataContainer dataContainer,
TransactionTable transactionTable,
StateTransferLock stateTransferLock,
StateConsumer stateConsumer, InternalEntryFactory entryFactory,
KeyPartitioner keyPartitioner) {
this.cacheName = cache.getName();
this.executorService = executorService;
this.configuration = configuration;
this.rpcManager = rpcManager;
this.commandsFactory = commandsFactory;
this.clusterCacheNotifier = clusterCacheNotifier;
this.persistenceManager = persistenceManager;
this.dataContainer = dataContainer;
this.transactionTable = transactionTable;
this.stateTransferLock = stateTransferLock;
this.stateConsumer = stateConsumer;
this.entryFactory = entryFactory;
timeout = configuration.clustering().stateTransfer().timeout();
this.chunkSize = configuration.clustering().stateTransfer().chunkSize();
this.keyPartitioner = keyPartitioner;
}
public boolean isStateTransferInProgress() {
synchronized (transfersByDestination) {
return !transfersByDestination.isEmpty();
}
}
public void onTopologyUpdate(CacheTopology cacheTopology, boolean isRebalance) {
// Cancel outbound state transfers for destinations that are no longer members in new topology
// If the rebalance was cancelled, stop every outbound transfer. This will prevent "leaking" transfers
// from one rebalance to the next.
Set<Address> members = new HashSet<>(cacheTopology.getWriteConsistentHash().getMembers());
synchronized (transfersByDestination) {
for (Iterator<Address> it = transfersByDestination.keySet().iterator(); it.hasNext(); ) {
Address destination = it.next();
if (!members.contains(destination)) {
List<OutboundTransferTask> transfers = transfersByDestination.get(destination);
it.remove();
for (OutboundTransferTask outboundTransfer : transfers) {
outboundTransfer.cancel();
}
}
}
}
//todo [anistor] must cancel transfers for all segments that we no longer own
}
@Start(priority = 60)
@Override
public void start() {
}
@Stop(priority = 0)
@Override
public void stop() {
if (trace) {
log.tracef("Shutting down StateProvider of cache %s on node %s", cacheName, rpcManager.getAddress());
}
// cancel all outbound transfers
try {
synchronized (transfersByDestination) {
for (Iterator<List<OutboundTransferTask>> it = transfersByDestination.values().iterator(); it.hasNext(); ) {
List<OutboundTransferTask> transfers = it.next();
it.remove();
for (OutboundTransferTask outboundTransfer : transfers) {
outboundTransfer.cancel();
}
}
}
} catch (Throwable t) {
log.errorf(t, "Failed to stop StateProvider of cache %s on node %s", cacheName, rpcManager.getAddress());
}
}
public List<TransactionInfo> getTransactionsForSegments(Address destination, int requestTopologyId, Set<Integer> segments) throws InterruptedException {
if (trace) {
log.tracef("Received request for transactions from node %s for cache %s, topology id %d, segments %s",
destination, cacheName, requestTopologyId, segments);
}
final CacheTopology cacheTopology = getCacheTopology(requestTopologyId, destination, true);
final ConsistentHash readCh = cacheTopology.getReadConsistentHash();
Set<Integer> ownedSegments = readCh.getSegmentsForOwner(rpcManager.getAddress());
if (!ownedSegments.containsAll(segments)) {
segments.removeAll(ownedSegments);
throw new IllegalArgumentException("Segments " + segments + " are not owned by " + rpcManager.getAddress());
}
List<TransactionInfo> transactions = new ArrayList<>();
//we migrate locks only if the cache is transactional and distributed
if (configuration.transaction().transactionMode().isTransactional()) {
collectTransactionsToTransfer(destination, transactions, transactionTable.getRemoteTransactions(), segments, cacheTopology);
collectTransactionsToTransfer(destination, transactions, transactionTable.getLocalTransactions(), segments, cacheTopology);
if (trace) {
log.tracef("Found %d transaction(s) to transfer", transactions.size());
}
}
return transactions;
}
@Override
public Collection<DistributedCallable> getClusterListenersToInstall() {
return clusterCacheNotifier.retrieveClusterListenerCallablesToInstall();
}
private CacheTopology getCacheTopology(int requestTopologyId, Address destination, boolean isReqForTransactions) throws InterruptedException {
CacheTopology cacheTopology = stateConsumer.getCacheTopology();
int currentTopologyId = cacheTopology != null ? cacheTopology.getTopologyId() : -1;
if (requestTopologyId < currentTopologyId) {
if (isReqForTransactions)
log.debugf("Transactions were requested by node %s with topology %d, older than the local topology (%d)",
destination, requestTopologyId, currentTopologyId);
else
log.debugf("Segments were requested by node %s with topology %d, older than the local topology (%d)",
destination, requestTopologyId, currentTopologyId);
} else if (requestTopologyId > currentTopologyId) {
if (trace) {
log.tracef("%s were requested by node %s with topology %d, greater than the local " +
"topology (%d). Waiting for topology %d to be installed locally.", isReqForTransactions ? "Transactions" : "Segments", destination,
requestTopologyId, currentTopologyId, requestTopologyId);
}
try {
stateTransferLock.waitForTopology(requestTopologyId, timeout, TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
throw log.failedWaitingForTopology(requestTopologyId);
}
cacheTopology = stateConsumer.getCacheTopology();
}
return cacheTopology;
}
private void collectTransactionsToTransfer(Address destination,
List<TransactionInfo> transactionsToTransfer,
Collection<? extends CacheTransaction> transactions,
Set<Integer> segments, CacheTopology cacheTopology) {
int topologyId = cacheTopology.getTopologyId();
List<Address> members = cacheTopology.getMembers();
// no need to filter out state transfer generated transactions because there should not be any such transactions running for any of the requested segments
for (CacheTransaction tx : transactions) {
// Skip transactions whose originators left. The topology id check is needed for joiners.
// Also skip transactions that originates after state transfer starts.
if (tx.getTopologyId() == topologyId || !members.contains(tx.getGlobalTransaction().getAddress())) {
if (trace) log.tracef("Skipping transaction %s as it was started in the current topology or by a leaver", tx);
continue;
}
// transfer only locked keys that belong to requested segments
Set<Object> filteredLockedKeys = new HashSet<>();
Set<Object> lockedKeys = tx.getLockedKeys();
synchronized (lockedKeys) {
for (Object key : lockedKeys) {
if (segments.contains(keyPartitioner.getSegment(key))) {
filteredLockedKeys.add(key);
}
}
}
Set<Object> backupLockedKeys = tx.getBackupLockedKeys();
synchronized (backupLockedKeys) {
for (Object key : backupLockedKeys) {
if (segments.contains(keyPartitioner.getSegment(key))) {
filteredLockedKeys.add(key);
}
}
}
if (filteredLockedKeys.isEmpty()) {
if (trace) log.tracef("Skipping transaction %s because the state requestor %s doesn't own any key",
tx, destination);
continue;
}
if (trace) log.tracef("Sending transaction %s to new owner %s", tx, destination);
List<WriteCommand> txModifications = tx.getModifications();
WriteCommand[] modifications = null;
if (!txModifications.isEmpty()) {
modifications = txModifications.toArray(new WriteCommand[txModifications.size()]);
}
// If a key affected by a local transaction has a new owner, we must add the new owner to the transaction's
// affected nodes set, so that the it receives the commit/rollback command. See ISPN-3389.
if(tx instanceof LocalTransaction) {
LocalTransaction localTx = (LocalTransaction) tx;
localTx.locksAcquired(Collections.singleton(destination));
if (trace) log.tracef("Adding affected node %s to transferred transaction %s (keys %s)", destination,
tx.getGlobalTransaction(), filteredLockedKeys);
}
transactionsToTransfer.add(new TransactionInfo(tx.getGlobalTransaction(), tx.getTopologyId(),
modifications, filteredLockedKeys));
}
}
@Override
public void startOutboundTransfer(Address destination, int requestTopologyId, Set<Integer> segments)
throws InterruptedException {
if (trace) {
log.tracef("Starting outbound transfer to node %s for cache %s, topology id %d, segments %s", destination,
cacheName, requestTopologyId, segments);
}
// the destination node must already have an InboundTransferTask waiting for these segments
OutboundTransferTask outboundTransfer = new OutboundTransferTask(destination, segments, chunkSize, requestTopologyId,
keyPartitioner, this, dataContainer, persistenceManager, rpcManager, commandsFactory, entryFactory, timeout, cacheName);
addTransfer(outboundTransfer);
outboundTransfer.execute(executorService);
}
private void addTransfer(OutboundTransferTask transferTask) {
if (trace) {
log.tracef("Adding outbound transfer to %s for segments %s", transferTask.getDestination(),
transferTask.getSegments());
}
synchronized (transfersByDestination) {
List<OutboundTransferTask> transfers = transfersByDestination
.computeIfAbsent(transferTask.getDestination(), k -> new ArrayList<>());
transfers.add(transferTask);
}
}
@Override
public void cancelOutboundTransfer(Address destination, int topologyId, Set<Integer> segments) {
if (trace) {
log.tracef("Cancelling outbound transfer to node %s for cache %s, topology id %d, segments %s", destination,
cacheName, topologyId, segments);
}
// get the outbound transfers for this address and given segments and cancel the transfers
synchronized (transfersByDestination) {
List<OutboundTransferTask> transferTasks = transfersByDestination.get(destination);
if (transferTasks != null) {
// get an array copy of the collection to avoid ConcurrentModificationException if the entire task gets cancelled and removeTransfer(transferTask) is called
OutboundTransferTask[] taskListCopy = transferTasks.toArray(new OutboundTransferTask[transferTasks.size()]);
for (OutboundTransferTask transferTask : taskListCopy) {
if (transferTask.getTopologyId() == topologyId) {
transferTask.cancelSegments(segments); //this can potentially result in a call to removeTransfer(transferTask)
}
}
}
}
}
private void removeTransfer(OutboundTransferTask transferTask) {
synchronized (transfersByDestination) {
List<OutboundTransferTask> transferTasks = transfersByDestination.get(transferTask.getDestination());
if (transferTasks != null) {
transferTasks.remove(transferTask);
if (transferTasks.isEmpty()) {
transfersByDestination.remove(transferTask.getDestination());
}
}
}
}
void onTaskCompletion(OutboundTransferTask transferTask) {
if (trace) {
log.tracef("Removing %s outbound transfer of segments to %s for cache %s, segments %s",
transferTask.isCancelled() ? "cancelled" : "completed", transferTask.getDestination(),
cacheName, transferTask.getSegments());
}
removeTransfer(transferTask);
}
}