/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.ignite.internal.processors.cache.distributed.dht.preloader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReadWriteLock;
import org.apache.ignite.IgniteCheckedException;
import org.apache.ignite.internal.IgniteNeedReconnectException;
import org.apache.ignite.IgniteLogger;
import org.apache.ignite.IgniteSystemProperties;
import org.apache.ignite.cache.PartitionLossPolicy;
import org.apache.ignite.cluster.ClusterNode;
import org.apache.ignite.events.CacheEvent;
import org.apache.ignite.events.DiscoveryEvent;
import org.apache.ignite.events.Event;
import org.apache.ignite.events.EventType;
import org.apache.ignite.internal.IgniteClientDisconnectedCheckedException;
import org.apache.ignite.internal.IgniteFutureTimeoutCheckedException;
import org.apache.ignite.internal.IgniteInternalFuture;
import org.apache.ignite.internal.IgniteInterruptedCheckedException;
import org.apache.ignite.internal.cluster.ClusterTopologyCheckedException;
import org.apache.ignite.internal.events.DiscoveryCustomEvent;
import org.apache.ignite.internal.managers.discovery.DiscoveryCustomMessage;
import org.apache.ignite.internal.managers.discovery.DiscoCache;
import org.apache.ignite.internal.pagemem.snapshot.StartFullSnapshotAckDiscoveryMessage;
import org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion;
import org.apache.ignite.internal.processors.affinity.GridAffinityAssignmentCache;
import org.apache.ignite.internal.processors.cache.CacheAffinityChangeMessage;
import org.apache.ignite.internal.processors.cache.CacheInvalidStateException;
import org.apache.ignite.internal.processors.cache.ClusterState;
import org.apache.ignite.internal.processors.cache.DynamicCacheChangeBatch;
import org.apache.ignite.internal.processors.cache.CachePartitionExchangeWorkerTask;
import org.apache.ignite.internal.processors.cache.DynamicCacheChangeRequest;
import org.apache.ignite.internal.processors.cache.DynamicCacheDescriptor;
import org.apache.ignite.internal.processors.cache.GridCacheContext;
import org.apache.ignite.internal.processors.cache.GridCacheMvccCandidate;
import org.apache.ignite.internal.processors.cache.GridCacheSharedContext;
import org.apache.ignite.internal.processors.cache.distributed.dht.GridClientPartitionTopology;
import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtLocalPartition;
import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionState;
import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopology;
import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtTopologyFuture;
import org.apache.ignite.internal.processors.cache.transactions.IgniteTxKey;
import org.apache.ignite.internal.processors.cache.version.GridCacheVersion;
import org.apache.ignite.internal.processors.cluster.GridClusterStateProcessor;
import org.apache.ignite.internal.processors.timeout.GridTimeoutObjectAdapter;
import org.apache.ignite.internal.util.future.GridFutureAdapter;
import org.apache.ignite.internal.util.tostring.GridToStringExclude;
import org.apache.ignite.internal.util.tostring.GridToStringInclude;
import org.apache.ignite.internal.util.typedef.CI1;
import org.apache.ignite.internal.util.typedef.F;
import org.apache.ignite.internal.util.typedef.T2;
import org.apache.ignite.internal.util.typedef.X;
import org.apache.ignite.internal.util.typedef.internal.CU;
import org.apache.ignite.internal.util.typedef.internal.LT;
import org.apache.ignite.internal.util.typedef.internal.S;
import org.apache.ignite.internal.util.typedef.internal.U;
import org.apache.ignite.lang.IgniteInClosure;
import org.apache.ignite.lang.IgniteRunnable;
import org.jetbrains.annotations.Nullable;
import org.jsr166.ConcurrentHashMap8;
import static org.apache.ignite.IgniteSystemProperties.IGNITE_THREAD_DUMP_ON_EXCHANGE_TIMEOUT;
import static org.apache.ignite.cache.PartitionLossPolicy.READ_ONLY_ALL;
import static org.apache.ignite.cache.PartitionLossPolicy.READ_ONLY_SAFE;
import static org.apache.ignite.cache.PartitionLossPolicy.READ_WRITE_ALL;
import static org.apache.ignite.cache.PartitionLossPolicy.READ_WRITE_SAFE;
import static org.apache.ignite.events.EventType.EVT_NODE_FAILED;
import static org.apache.ignite.events.EventType.EVT_NODE_JOINED;
import static org.apache.ignite.events.EventType.EVT_NODE_LEFT;
import static org.apache.ignite.internal.events.DiscoveryCustomEvent.EVT_DISCOVERY_CUSTOM_EVT;
import static org.apache.ignite.internal.managers.communication.GridIoPolicy.SYSTEM_POOL;
/**
* Future for exchanging partition maps.
*/
@SuppressWarnings({"TypeMayBeWeakened", "unchecked"})
public class GridDhtPartitionsExchangeFuture extends GridFutureAdapter<AffinityTopologyVersion>
implements Comparable<GridDhtPartitionsExchangeFuture>, GridDhtTopologyFuture, CachePartitionExchangeWorkerTask {
/** */
public static final int DUMP_PENDING_OBJECTS_THRESHOLD =
IgniteSystemProperties.getInteger(IgniteSystemProperties.IGNITE_DUMP_PENDING_OBJECTS_THRESHOLD, 10);
/** */
private static final long serialVersionUID = 0L;
/** Dummy flag. */
private final boolean dummy;
/** Force preload flag. */
private final boolean forcePreload;
/** Dummy reassign flag. */
private final boolean reassign;
/** */
@GridToStringExclude
private volatile DiscoCache discoCache;
/** Discovery event. */
private volatile DiscoveryEvent discoEvt;
/** */
@GridToStringExclude
private final Set<UUID> remaining = new HashSet<>();
/** */
@GridToStringExclude
private int pendingSingleUpdates;
/** */
@GridToStringExclude
private List<ClusterNode> srvNodes;
/** */
private ClusterNode crd;
/** ExchangeFuture id. */
private final GridDhtPartitionExchangeId exchId;
/** Cache context. */
private final GridCacheSharedContext<?, ?> cctx;
/** Busy lock to prevent activities from accessing exchanger while it's stopping. */
private ReadWriteLock busyLock;
/** */
private AtomicBoolean added = new AtomicBoolean(false);
/** Event latch. */
@GridToStringExclude
private final CountDownLatch evtLatch = new CountDownLatch(1);
/** */
private GridFutureAdapter<Boolean> initFut;
/** */
@GridToStringExclude
private final List<IgniteRunnable> discoEvts = new ArrayList<>();
/** */
private boolean init;
/** Last committed cache version before next topology version use. */
private AtomicReference<GridCacheVersion> lastVer = new AtomicReference<>();
/**
* Messages received on non-coordinator are stored in case if this node
* becomes coordinator.
*/
private final Map<ClusterNode, GridDhtPartitionsSingleMessage> singleMsgs = new ConcurrentHashMap8<>();
/** Messages received from new coordinator. */
private final Map<ClusterNode, GridDhtPartitionsFullMessage> fullMsgs = new ConcurrentHashMap8<>();
/** */
@SuppressWarnings({"FieldCanBeLocal", "UnusedDeclaration"})
@GridToStringInclude
private volatile IgniteInternalFuture<?> partReleaseFut;
/** Logger. */
private final IgniteLogger log;
/** Dynamic cache change requests. */
private Collection<DynamicCacheChangeRequest> reqs;
/** */
private CacheAffinityChangeMessage affChangeMsg;
/** Cache validation results. */
private volatile Map<Integer, CacheValidation> cacheValidRes;
/** Skip preload flag. */
private boolean skipPreload;
/** */
private boolean clientOnlyExchange;
/** Init timestamp. Used to track the amount of time spent to complete the future. */
private long initTs;
/** */
private boolean centralizedAff;
/** Change global state exception. */
private Exception changeGlobalStateE;
/** Change global state exceptions. */
private final Map<UUID, Exception> changeGlobalStateExceptions = new ConcurrentHashMap8<>();
/** This exchange for change global state. */
private boolean exchangeOnChangeGlobalState;
/** */
private final ConcurrentMap<UUID, GridDhtPartitionsAbstractMessage> msgs = new ConcurrentHashMap8<>();
/** Forced Rebalance future. */
private GridFutureAdapter<Boolean> forcedRebFut;
/**
* Dummy future created to trigger reassignments if partition
* topology changed while preloading.
*
* @param cctx Cache context.
* @param reassign Dummy reassign flag.
* @param discoEvt Discovery event.
* @param exchId Exchange id.
*/
public GridDhtPartitionsExchangeFuture(
GridCacheSharedContext cctx,
boolean reassign,
DiscoveryEvent discoEvt,
GridDhtPartitionExchangeId exchId
) {
dummy = true;
forcePreload = false;
this.exchId = exchId;
this.reassign = reassign;
this.discoEvt = discoEvt;
this.cctx = cctx;
log = cctx.logger(getClass());
onDone(exchId.topologyVersion());
}
/**
* Force preload future created to trigger reassignments if partition
* topology changed while preloading.
*
* @param cctx Cache context.
* @param discoEvt Discovery event.
* @param exchId Exchange id.
* @param forcedRebFut Forced Rebalance future.
*/
public GridDhtPartitionsExchangeFuture(GridCacheSharedContext cctx, DiscoveryEvent discoEvt,
GridDhtPartitionExchangeId exchId, GridFutureAdapter<Boolean> forcedRebFut) {
dummy = false;
forcePreload = true;
this.exchId = exchId;
this.discoEvt = discoEvt;
this.cctx = cctx;
this.forcedRebFut = forcedRebFut;
log = cctx.logger(getClass());
reassign = true;
onDone(exchId.topologyVersion());
}
/**
* @param cctx Cache context.
* @param busyLock Busy lock.
* @param exchId Exchange ID.
* @param reqs Cache change requests.
* @param affChangeMsg Affinity change message.
*/
public GridDhtPartitionsExchangeFuture(
GridCacheSharedContext cctx,
ReadWriteLock busyLock,
GridDhtPartitionExchangeId exchId,
Collection<DynamicCacheChangeRequest> reqs,
CacheAffinityChangeMessage affChangeMsg
) {
assert busyLock != null;
assert exchId != null;
assert exchId.topologyVersion() != null;
dummy = false;
forcePreload = false;
reassign = false;
this.cctx = cctx;
this.busyLock = busyLock;
this.exchId = exchId;
this.reqs = reqs;
this.affChangeMsg = affChangeMsg;
log = cctx.logger(getClass());
initFut = new GridFutureAdapter<>();
if (log.isDebugEnabled())
log.debug("Creating exchange future [localNode=" + cctx.localNodeId() + ", fut=" + this + ']');
}
/**
* @param reqs Cache change requests.
*/
public void cacheChangeRequests(Collection<DynamicCacheChangeRequest> reqs) {
this.reqs = reqs;
}
/**
* @param affChangeMsg Affinity change message.
*/
public void affinityChangeMessage(CacheAffinityChangeMessage affChangeMsg) {
this.affChangeMsg = affChangeMsg;
}
/** {@inheritDoc} */
@Override public AffinityTopologyVersion topologyVersion() {
return exchId.topologyVersion();
}
/**
* @return Skip preload flag.
*/
public boolean skipPreload() {
return skipPreload;
}
/**
* @return Dummy flag.
*/
public boolean dummy() {
return dummy;
}
/**
* @return Force preload flag.
*/
public boolean forcePreload() {
return forcePreload;
}
/**
* @return Dummy reassign flag.
*/
public boolean reassign() {
return reassign;
}
/**
* @return {@code True} if dummy reassign.
*/
public boolean dummyReassign() {
return (dummy() || forcePreload()) && reassign();
}
/**
* @return Discovery cache.
*/
public DiscoCache discoCache() {
return discoCache;
}
/**
* @param cacheId Cache ID to check.
* @param topVer Topology version.
* @return {@code True} if cache was added during this exchange.
*/
public boolean isCacheAdded(int cacheId, AffinityTopologyVersion topVer) {
if (cacheStarted(cacheId))
return true;
GridCacheContext<?, ?> cacheCtx = cctx.cacheContext(cacheId);
return cacheCtx != null && F.eq(cacheCtx.startTopologyVersion(), topVer);
}
/**
* @param cacheId Cache ID.
* @return {@code True} if non-client cache was added during this exchange.
*/
public boolean cacheStarted(int cacheId) {
if (!F.isEmpty(reqs)) {
for (DynamicCacheChangeRequest req : reqs) {
if (req.start() && !req.clientStartOnly()) {
if (CU.cacheId(req.cacheName()) == cacheId)
return true;
}
}
}
return false;
}
/**
* @return {@code True}
*/
public boolean onAdded() {
return added.compareAndSet(false, true);
}
/**
* Event callback.
*
* @param exchId Exchange ID.
* @param discoEvt Discovery event.
* @param discoCache Discovery data cache.
*/
public void onEvent(GridDhtPartitionExchangeId exchId, DiscoveryEvent discoEvt, DiscoCache discoCache) {
assert exchId.equals(this.exchId);
this.discoEvt = discoEvt;
this.discoCache = discoCache;
evtLatch.countDown();
}
/**
*
*/
public ClusterState newClusterState() {
if (!F.isEmpty(reqs)) {
for (DynamicCacheChangeRequest req : reqs) {
if (req.globalStateChange())
return req.state();
}
}
return null;
}
/**
* @return Discovery event.
*/
public DiscoveryEvent discoveryEvent() {
return discoEvt;
}
/**
* @return Exchange ID.
*/
public GridDhtPartitionExchangeId exchangeId() {
return exchId;
}
/**
* @return Forced Rebalance future.
*/
@Nullable public GridFutureAdapter<Boolean> forcedRebalanceFuture() {
return forcedRebFut;
}
/**
* @return {@code true} if entered to busy state.
*/
private boolean enterBusy() {
if (busyLock.readLock().tryLock())
return true;
if (log.isDebugEnabled())
log.debug("Failed to enter busy state (exchanger is stopping): " + this);
return false;
}
/**
*
*/
private void leaveBusy() {
busyLock.readLock().unlock();
}
/**
* Starts activity.
*
* @throws IgniteInterruptedCheckedException If interrupted.
*/
public void init() throws IgniteInterruptedCheckedException {
if (isDone())
return;
initTs = U.currentTimeMillis();
U.await(evtLatch);
assert discoEvt != null : this;
assert exchId.nodeId().equals(discoEvt.eventNode().id()) : this;
assert !dummy && !forcePreload : this;
try {
discoCache.updateAlives(cctx.discovery());
AffinityTopologyVersion topVer = topologyVersion();
srvNodes = new ArrayList<>(discoCache.serverNodes());
remaining.addAll(F.nodeIds(F.view(srvNodes, F.remoteNodes(cctx.localNodeId()))));
crd = srvNodes.isEmpty() ? null : srvNodes.get(0);
boolean crdNode = crd != null && crd.isLocal();
skipPreload = cctx.kernalContext().clientNode();
ExchangeType exchange;
if (discoEvt.type() == EVT_DISCOVERY_CUSTOM_EVT) {
DiscoveryCustomMessage msg = ((DiscoveryCustomEvent)discoEvt).customMessage();
if (msg instanceof DynamicCacheChangeBatch){
assert !F.isEmpty(reqs);
exchange = onCacheChangeRequest(crdNode);
}
else if (msg instanceof StartFullSnapshotAckDiscoveryMessage)
exchange = CU.clientNode(discoEvt.eventNode()) ?
onClientNodeEvent(crdNode) :
onServerNodeEvent(crdNode);
else {
assert affChangeMsg != null : this;
exchange = onAffinityChangeRequest(crdNode);
}
}
else {
if (discoEvt.type() == EVT_NODE_JOINED) {
Collection<DynamicCacheDescriptor> receivedCaches = cctx.cache().startReceivedCaches(topVer);
if (!discoEvt.eventNode().isLocal())
cctx.affinity().initStartedCaches(crdNode, this, receivedCaches);
}
exchange = CU.clientNode(discoEvt.eventNode()) ?
onClientNodeEvent(crdNode) :
onServerNodeEvent(crdNode);
}
updateTopologies(crdNode);
if (!F.isEmpty(reqs)) {
boolean hasStop = false;
for (DynamicCacheChangeRequest req : reqs) {
if (req.stop()) {
hasStop = true;
break;
}
}
if (hasStop)
cctx.cache().context().database().beforeCachesStop();
}
switch (exchange) {
case ALL: {
distributedExchange();
break;
}
case CLIENT: {
initTopologies();
clientOnlyExchange();
break;
}
case NONE: {
initTopologies();
onDone(topVer);
break;
}
default:
assert false;
}
}
catch (IgniteInterruptedCheckedException e) {
onDone(e);
throw e;
}
catch (IgniteNeedReconnectException e) {
onDone(e);
}
catch (Throwable e) {
if (reconnectOnError(e))
onDone(new IgniteNeedReconnectException(cctx.localNode(), e));
else {
U.error(log, "Failed to reinitialize local partitions (preloading will be stopped): " + exchId, e);
onDone(e);
}
if (e instanceof Error)
throw (Error)e;
}
}
/**
* @throws IgniteCheckedException If failed.
*/
private void initTopologies() throws IgniteCheckedException {
cctx.database().checkpointReadLock();
try {
if (crd != null) {
for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
if (cacheCtx.isLocal())
continue;
cacheCtx.topology().beforeExchange(this, !centralizedAff);
}
}
}
finally {
cctx.database().checkpointReadUnlock();
}
}
/**
* @param crd Coordinator flag.
* @throws IgniteCheckedException If failed.
*/
private void updateTopologies(boolean crd) throws IgniteCheckedException {
for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
if (cacheCtx.isLocal())
continue;
GridClientPartitionTopology clientTop = cctx.exchange().clearClientTopology(cacheCtx.cacheId());
long updSeq = clientTop == null ? -1 : clientTop.lastUpdateSequence();
GridDhtPartitionTopology top = cacheCtx.topology();
if (crd) {
boolean updateTop = !cacheCtx.isLocal() &&
exchId.topologyVersion().equals(cacheCtx.startTopologyVersion());
if (updateTop && clientTop != null)
top.update(exchId, clientTop.partitionMap(true), clientTop.updateCounters(false));
}
top.updateTopologyVersion(exchId, this, updSeq, stopping(cacheCtx.cacheId()));
}
for (GridClientPartitionTopology top : cctx.exchange().clientTopologies())
top.updateTopologyVersion(exchId, this, -1, stopping(top.cacheId()));
}
/**
* @param crd Coordinator flag.
* @return Exchange type.
* @throws IgniteCheckedException If failed.
*/
private ExchangeType onCacheChangeRequest(boolean crd) throws IgniteCheckedException {
assert !F.isEmpty(reqs) : this;
GridClusterStateProcessor stateProc = cctx.kernalContext().state();
if (exchangeOnChangeGlobalState = stateProc.changeGlobalState(reqs, topologyVersion())) {
changeGlobalStateE = stateProc.onChangeGlobalState();
if (crd && changeGlobalStateE != null)
changeGlobalStateExceptions.put(cctx.localNodeId(), changeGlobalStateE);
}
boolean clientOnly = cctx.affinity().onCacheChangeRequest(this, crd, reqs);
if (clientOnly) {
boolean clientCacheStarted = false;
for (DynamicCacheChangeRequest req : reqs) {
if (req.start() && req.clientStartOnly() && req.initiatingNodeId().equals(cctx.localNodeId())) {
clientCacheStarted = true;
break;
}
}
return clientCacheStarted ? ExchangeType.CLIENT : ExchangeType.NONE;
}
else
return cctx.kernalContext().clientNode() ? ExchangeType.CLIENT : ExchangeType.ALL;
}
/**
* @param crd Coordinator flag.
* @throws IgniteCheckedException If failed.
* @return Exchange type.
*/
private ExchangeType onAffinityChangeRequest(boolean crd) throws IgniteCheckedException {
assert affChangeMsg != null : this;
cctx.affinity().onChangeAffinityMessage(this, crd, affChangeMsg);
if (cctx.kernalContext().clientNode())
return ExchangeType.CLIENT;
return ExchangeType.ALL;
}
/**
* @param crd Coordinator flag.
* @throws IgniteCheckedException If failed.
* @return Exchange type.
*/
private ExchangeType onClientNodeEvent(boolean crd) throws IgniteCheckedException {
assert CU.clientNode(discoEvt.eventNode()) : this;
if (discoEvt.type() == EVT_NODE_LEFT || discoEvt.type() == EVT_NODE_FAILED) {
onLeft();
assert !discoEvt.eventNode().isLocal() : discoEvt;
}
else
assert discoEvt.type() == EVT_NODE_JOINED || discoEvt.type() == EVT_DISCOVERY_CUSTOM_EVT : discoEvt;
cctx.affinity().onClientEvent(this, crd);
return discoEvt.eventNode().isLocal() ? ExchangeType.CLIENT : ExchangeType.NONE;
}
/**
* @param crd Coordinator flag.
* @throws IgniteCheckedException If failed.
* @return Exchange type.
*/
private ExchangeType onServerNodeEvent(boolean crd) throws IgniteCheckedException {
assert !CU.clientNode(discoEvt.eventNode()) : this;
if (discoEvt.type() == EVT_NODE_LEFT || discoEvt.type() == EVT_NODE_FAILED) {
onLeft();
warnNoAffinityNodes();
centralizedAff = cctx.affinity().onServerLeft(this);
}
else
cctx.affinity().onServerJoin(this, crd);
return cctx.kernalContext().clientNode() ? ExchangeType.CLIENT : ExchangeType.ALL;
}
/**
* @throws IgniteCheckedException If failed.
*/
private void clientOnlyExchange() throws IgniteCheckedException {
clientOnlyExchange = true;
//todo checl invoke on client
if (crd != null) {
if (crd.isLocal()) {
for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
boolean updateTop = !cacheCtx.isLocal() &&
exchId.topologyVersion().equals(cacheCtx.startTopologyVersion());
if (updateTop) {
for (GridClientPartitionTopology top : cctx.exchange().clientTopologies()) {
if (top.cacheId() == cacheCtx.cacheId()) {
cacheCtx.topology().update(exchId,
top.partitionMap(true),
top.updateCounters(false));
break;
}
}
}
}
}
else {
if (!centralizedAff)
sendLocalPartitions(crd);
initDone();
return;
}
}
else {
if (centralizedAff) { // Last server node failed.
for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
GridAffinityAssignmentCache aff = cacheCtx.affinity().affinityCache();
aff.initialize(topologyVersion(), aff.idealAssignment());
}
}
}
onDone(topologyVersion());
}
/**
* @throws IgniteCheckedException If failed.
*/
private void distributedExchange() throws IgniteCheckedException {
assert crd != null;
assert !cctx.kernalContext().clientNode();
for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
if (cacheCtx.isLocal())
continue;
cacheCtx.preloader().onTopologyChanged(this);
}
waitPartitionRelease();
boolean topChanged = discoEvt.type() != EVT_DISCOVERY_CUSTOM_EVT || affChangeMsg != null;
//todo check
for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
if (cacheCtx.isLocal() || stopping(cacheCtx.cacheId()))
continue;
if (topChanged) {
cacheCtx.continuousQueries().beforeExchange(exchId.topologyVersion());
// Partition release future is done so we can flush the write-behind store.
cacheCtx.store().forceFlush();
}
cacheCtx.topology().beforeExchange(this, !centralizedAff);
}
cctx.database().beforeExchange(this);
// If a backup request, synchronously wait for backup start.
if (discoEvt.type() == EVT_DISCOVERY_CUSTOM_EVT) {
DiscoveryCustomMessage customMessage = ((DiscoveryCustomEvent)discoEvt).customMessage();
if (customMessage instanceof StartFullSnapshotAckDiscoveryMessage) {
StartFullSnapshotAckDiscoveryMessage backupMsg = (StartFullSnapshotAckDiscoveryMessage)customMessage;
if (!cctx.localNode().isClient() && !cctx.localNode().isDaemon()) {
ClusterNode node = cctx.discovery().node(backupMsg.initiatorNodeId());
assert node != null;
IgniteInternalFuture fut = cctx.database().startLocalSnapshotCreation(backupMsg, node, backupMsg.message());
if (fut != null)
fut.get();
}
}
}
if (crd.isLocal()) {
if (remaining.isEmpty())
onAllReceived();
}
else
sendPartitions(crd);
initDone();
}
/**
* @throws IgniteCheckedException If failed.
*/
private void waitPartitionRelease() throws IgniteCheckedException {
IgniteInternalFuture<?> partReleaseFut = cctx.partitionReleaseFuture(topologyVersion());
// Assign to class variable so it will be included into toString() method.
this.partReleaseFut = partReleaseFut;
if (exchId.isLeft())
cctx.mvcc().removeExplicitNodeLocks(exchId.nodeId(), exchId.topologyVersion());
if (log.isDebugEnabled())
log.debug("Before waiting for partition release future: " + this);
int dumpedObjects = 0;
while (true) {
try {
partReleaseFut.get(2 * cctx.gridConfig().getNetworkTimeout(), TimeUnit.MILLISECONDS);
break;
}
catch (IgniteFutureTimeoutCheckedException ignored) {
// Print pending transactions and locks that might have led to hang.
if (dumpedObjects < DUMP_PENDING_OBJECTS_THRESHOLD) {
dumpPendingObjects();
dumpedObjects++;
}
}
}
if (log.isDebugEnabled())
log.debug("After waiting for partition release future: " + this);
IgniteInternalFuture<?> locksFut = cctx.mvcc().finishLocks(exchId.topologyVersion());
dumpedObjects = 0;
while (true) {
try {
locksFut.get(2 * cctx.gridConfig().getNetworkTimeout(), TimeUnit.MILLISECONDS);
break;
}
catch (IgniteFutureTimeoutCheckedException ignored) {
if (dumpedObjects < DUMP_PENDING_OBJECTS_THRESHOLD) {
U.warn(log, "Failed to wait for locks release future. " +
"Dumping pending objects that might be the cause: " + cctx.localNodeId());
U.warn(log, "Locked keys:");
for (IgniteTxKey key : cctx.mvcc().lockedKeys())
U.warn(log, "Locked key: " + key);
for (IgniteTxKey key : cctx.mvcc().nearLockedKeys())
U.warn(log, "Locked near key: " + key);
Map<IgniteTxKey, Collection<GridCacheMvccCandidate>> locks =
cctx.mvcc().unfinishedLocks(exchId.topologyVersion());
for (Map.Entry<IgniteTxKey, Collection<GridCacheMvccCandidate>> e : locks.entrySet())
U.warn(log, "Awaited locked entry [key=" + e.getKey() + ", mvcc=" + e.getValue() + ']');
dumpedObjects++;
if (IgniteSystemProperties.getBoolean(IGNITE_THREAD_DUMP_ON_EXCHANGE_TIMEOUT, false))
U.dumpThreads(log);
}
}
}
}
/**
*
*/
private void onLeft() {
for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
if (cacheCtx.isLocal())
continue;
cacheCtx.preloader().unwindUndeploys();
}
cctx.mvcc().removeExplicitNodeLocks(exchId.nodeId(), exchId.topologyVersion());
}
/**
*
*/
private void warnNoAffinityNodes() {
List<String> cachesWithoutNodes = null;
for (String name : cctx.cache().cacheNames()) {
if (discoCache.cacheAffinityNodes(name).isEmpty()) {
if (cachesWithoutNodes == null)
cachesWithoutNodes = new ArrayList<>();
cachesWithoutNodes.add(name);
// Fire event even if there is no client cache started.
if (cctx.gridEvents().isRecordable(EventType.EVT_CACHE_NODES_LEFT)) {
Event evt = new CacheEvent(
name,
cctx.localNode(),
cctx.localNode(),
"All server nodes have left the cluster.",
EventType.EVT_CACHE_NODES_LEFT,
0,
false,
null,
null,
null,
null,
false,
null,
false,
null,
null,
null
);
cctx.gridEvents().record(evt);
}
}
}
if (cachesWithoutNodes != null) {
StringBuilder sb =
new StringBuilder("All server nodes for the following caches have left the cluster: ");
for (int i = 0; i < cachesWithoutNodes.size(); i++) {
String cache = cachesWithoutNodes.get(i);
sb.append('\'').append(cache).append('\'');
if (i != cachesWithoutNodes.size() - 1)
sb.append(", ");
}
U.quietAndWarn(log, sb.toString());
U.quietAndWarn(log, "Must have server nodes for caches to operate.");
}
}
/**
*
*/
private void dumpPendingObjects() {
U.warn(log, "Failed to wait for partition release future [topVer=" + topologyVersion() +
", node=" + cctx.localNodeId() + "]. Dumping pending objects that might be the cause: ");
try {
cctx.exchange().dumpDebugInfo(topologyVersion());
}
catch (Exception e) {
U.error(log, "Failed to dump debug information: " + e, e);
}
if (IgniteSystemProperties.getBoolean(IGNITE_THREAD_DUMP_ON_EXCHANGE_TIMEOUT, false))
U.dumpThreads(log);
}
/**
* @param cacheId Cache ID to check.
* @return {@code True} if cache is stopping by this exchange.
*/
public boolean stopping(int cacheId) {
boolean stopping = false;
if (!F.isEmpty(reqs)) {
for (DynamicCacheChangeRequest req : reqs) {
if (cacheId == CU.cacheId(req.cacheName())) {
stopping = req.stop();
break;
}
}
}
return stopping;
}
/**
* @param node Node.
* @throws IgniteCheckedException If failed.
*/
private void sendLocalPartitions(ClusterNode node) throws IgniteCheckedException {
assert node != null;
// Reset lost partition before send local partition to coordinator.
if (!F.isEmpty(reqs)) {
Set<String> caches = new HashSet<>();
for (DynamicCacheChangeRequest req : reqs) {
if (req.resetLostPartitions())
caches.add(req.cacheName());
}
if (!F.isEmpty(caches))
resetLostPartitions(caches);
}
GridDhtPartitionsSingleMessage m = cctx.exchange().createPartitionsSingleMessage(
node, exchangeId(), clientOnlyExchange, true);
if (exchangeOnChangeGlobalState && changeGlobalStateE != null)
m.setException(changeGlobalStateE);
if (log.isDebugEnabled())
log.debug("Sending local partitions [nodeId=" + node.id() + ", exchId=" + exchId + ", msg=" + m + ']');
try {
cctx.io().send(node, m, SYSTEM_POOL);
}
catch (ClusterTopologyCheckedException ignored) {
if (log.isDebugEnabled())
log.debug("Node left during partition exchange [nodeId=" + node.id() + ", exchId=" + exchId + ']');
}
}
/**
* @param compress {@code True} if it is possible to use compression for message.
* @return Message.
*/
private GridDhtPartitionsFullMessage createPartitionsMessage(Collection<ClusterNode> nodes, boolean compress) {
GridCacheVersion last = lastVer.get();
GridDhtPartitionsFullMessage m = cctx.exchange().createPartitionsFullMessage(
nodes,
exchangeId(),
last != null ? last : cctx.versions().last(),
compress);
if (exchangeOnChangeGlobalState && !F.isEmpty(changeGlobalStateExceptions))
m.setExceptionsMap(changeGlobalStateExceptions);
return m;
}
/**
* @param nodes Nodes.
* @throws IgniteCheckedException If failed.
*/
private void sendAllPartitions(Collection<ClusterNode> nodes) throws IgniteCheckedException {
GridDhtPartitionsFullMessage m = createPartitionsMessage(nodes, true);
assert !nodes.contains(cctx.localNode());
if (log.isDebugEnabled())
log.debug("Sending full partition map [nodeIds=" + F.viewReadOnly(nodes, F.node2id()) +
", exchId=" + exchId + ", msg=" + m + ']');
cctx.io().safeSend(nodes, m, SYSTEM_POOL, null);
}
/**
* @param oldestNode Oldest node.
*/
private void sendPartitions(ClusterNode oldestNode) {
try {
sendLocalPartitions(oldestNode);
}
catch (ClusterTopologyCheckedException ignore) {
if (log.isDebugEnabled())
log.debug("Oldest node left during partition exchange [nodeId=" + oldestNode.id() +
", exchId=" + exchId + ']');
}
catch (IgniteCheckedException e) {
U.error(log, "Failed to send local partitions to oldest node (will retry after timeout) [oldestNodeId=" +
oldestNode.id() + ", exchId=" + exchId + ']', e);
}
}
/** {@inheritDoc} */
@Override public boolean onDone(@Nullable AffinityTopologyVersion res, @Nullable Throwable err) {
boolean realExchange = !dummy && !forcePreload;
if (err == null && realExchange) {
for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
if (cacheCtx.isLocal())
continue;
try {
if (centralizedAff)
cacheCtx.topology().initPartitions(this);
}
catch (IgniteInterruptedCheckedException e) {
U.error(log, "Failed to initialize partitions.", e);
}
GridCacheContext drCacheCtx = cacheCtx.isNear() ? cacheCtx.near().dht().context() : cacheCtx;
if (drCacheCtx.isDrEnabled()) {
try {
drCacheCtx.dr().onExchange(topologyVersion(), exchId.isLeft());
}
catch (IgniteCheckedException e) {
U.error(log, "Failed to notify DR: " + e, e);
}
}
}
if (discoEvt.type() == EVT_NODE_LEFT ||
discoEvt.type() == EVT_NODE_FAILED ||
discoEvt.type() == EVT_NODE_JOINED)
detectLostPartitions();
Map<Integer, CacheValidation> m = new HashMap<>(cctx.cacheContexts().size());
for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
Collection<Integer> lostParts = cacheCtx.isLocal() ?
Collections.<Integer>emptyList() : cacheCtx.topology().lostPartitions();
boolean valid = true;
if (cacheCtx.config().getTopologyValidator() != null && !CU.isSystemCache(cacheCtx.name()))
valid = cacheCtx.config().getTopologyValidator().validate(discoEvt.topologyNodes());
m.put(cacheCtx.cacheId(), new CacheValidation(valid, lostParts));
}
cacheValidRes = m;
}
cctx.cache().onExchangeDone(exchId.topologyVersion(), reqs, err);
cctx.exchange().onExchangeDone(this, err);
if (!F.isEmpty(reqs) && err == null) {
for (DynamicCacheChangeRequest req : reqs)
cctx.cache().completeStartFuture(req);
}
if (exchangeOnChangeGlobalState && err == null)
cctx.kernalContext().state().onExchangeDone();
if (super.onDone(res, err) && realExchange) {
if (log.isDebugEnabled())
log.debug("Completed partition exchange [localNode=" + cctx.localNodeId() + ", exchange= " + this +
", durationFromInit=" + (U.currentTimeMillis() - initTs) + ']');
initFut.onDone(err == null);
if (exchId.isLeft()) {
for (GridCacheContext cacheCtx : cctx.cacheContexts())
cacheCtx.config().getAffinity().removeNode(exchId.nodeId());
}
reqs = null;
if (discoEvt instanceof DiscoveryCustomEvent)
((DiscoveryCustomEvent)discoEvt).customMessage(null);
cctx.exchange().lastFinishedFuture(this);
return true;
}
return dummy;
}
/** {@inheritDoc} */
@Nullable @Override public Throwable validateCache(
GridCacheContext cctx,
boolean recovery,
boolean read,
@Nullable Object key,
@Nullable Collection<?> keys
) {
assert isDone() : this;
Throwable err = error();
if (err != null)
return err;
if (!cctx.shared().kernalContext().state().active())
return new CacheInvalidStateException(
"Failed to perform cache operation (cluster is not activated): " + cctx.name());
PartitionLossPolicy partLossPlc = cctx.config().getPartitionLossPolicy();
if (cctx.needsRecovery() && !recovery) {
if (!read && (partLossPlc == READ_ONLY_SAFE || partLossPlc == READ_ONLY_ALL))
return new IgniteCheckedException("Failed to write to cache (cache is moved to a read-only state): " +
cctx.name());
}
if (cctx.needsRecovery() || cctx.config().getTopologyValidator() != null) {
CacheValidation validation = cacheValidRes.get(cctx.cacheId());
if (validation == null)
return null;
if (!validation.valid && !read)
return new IgniteCheckedException("Failed to perform cache operation " +
"(cache topology is not valid): " + cctx.name());
if (recovery || !cctx.needsRecovery())
return null;
if (key != null) {
int p = cctx.affinity().partition(key);
CacheInvalidStateException ex = validatePartitionOperation(cctx.name(), read, key, p,
validation.lostParts, partLossPlc);
if (ex != null)
return ex;
}
if (keys != null) {
for (Object k : keys) {
int p = cctx.affinity().partition(k);
CacheInvalidStateException ex = validatePartitionOperation(cctx.name(), read, k, p,
validation.lostParts, partLossPlc);
if (ex != null)
return ex;
}
}
}
return null;
}
/**
* @param cacheName Cache name.
* @param read Read flag.
* @param key Key to check.
* @param part Partition this key belongs to.
* @param lostParts Collection of lost partitions.
* @param plc Partition loss policy.
* @return Invalid state exception if this operation is disallowed.
*/
private CacheInvalidStateException validatePartitionOperation(
String cacheName,
boolean read,
Object key,
int part,
Collection<Integer> lostParts,
PartitionLossPolicy plc
) {
if (lostParts.contains(part)) {
if (!read) {
assert plc == READ_WRITE_ALL || plc == READ_WRITE_SAFE;
if (plc == READ_WRITE_SAFE) {
return new CacheInvalidStateException("Failed to execute cache operation " +
"(all partition owners have left the grid, partition data has been lost) [" +
"cacheName=" + cacheName + ", part=" + part + ", key=" + key + ']');
}
}
else {
// Read.
if (plc == READ_ONLY_SAFE || plc == READ_WRITE_SAFE)
return new CacheInvalidStateException("Failed to execute cache operation " +
"(all partition owners have left the grid, partition data has been lost) [" +
"cacheName=" + cacheName + ", part=" + part + ", key=" + key + ']');
}
}
return null;
}
/**
* Cleans up resources to avoid excessive memory usage.
*/
public void cleanUp() {
singleMsgs.clear();
fullMsgs.clear();
changeGlobalStateExceptions.clear();
crd = null;
partReleaseFut = null;
changeGlobalStateE = null;
}
/**
* @param ver Version.
*/
private void updateLastVersion(GridCacheVersion ver) {
assert ver != null;
while (true) {
GridCacheVersion old = lastVer.get();
if (old == null || Long.compare(old.order(), ver.order()) < 0) {
if (lastVer.compareAndSet(old, ver))
break;
}
else
break;
}
}
/**
* @param node Sender node.
* @param msg Single partition info.
*/
public void onReceive(final ClusterNode node, final GridDhtPartitionsSingleMessage msg) {
assert msg != null;
assert msg.exchangeId().equals(exchId) : msg;
assert msg.lastVersion() != null : msg;
if (!msg.client())
updateLastVersion(msg.lastVersion());
if (isDone()) {
if (log.isDebugEnabled())
log.debug("Received message for finished future (will reply only to sender) [msg=" + msg +
", fut=" + this + ']');
if (!centralizedAff)
sendAllPartitions(node.id(), cctx.gridConfig().getNetworkSendRetryCount());
}
else {
initFut.listen(new CI1<IgniteInternalFuture<Boolean>>() {
@Override public void apply(IgniteInternalFuture<Boolean> f) {
try {
if (!f.get())
return;
}
catch (IgniteCheckedException e) {
U.error(log, "Failed to initialize exchange future: " + this, e);
return;
}
processMessage(node, msg);
}
});
}
}
/**
* @param node Sender node.
* @param msg Message.
*/
private void processMessage(ClusterNode node, GridDhtPartitionsSingleMessage msg) {
boolean allReceived = false;
boolean updateSingleMap = false;
synchronized (this) {
assert crd != null;
if (crd.isLocal()) {
if (remaining.remove(node.id())) {
updateSingleMap = true;
pendingSingleUpdates++;
if (exchangeOnChangeGlobalState && msg.getException() != null)
changeGlobalStateExceptions.put(node.id(), msg.getException());
allReceived = remaining.isEmpty();
}
}
else
singleMsgs.put(node, msg);
}
if (updateSingleMap) {
try {
updatePartitionSingleMap(node, msg);
}
finally {
synchronized (this) {
assert pendingSingleUpdates > 0;
pendingSingleUpdates--;
if (pendingSingleUpdates == 0)
notifyAll();
}
}
}
if (allReceived) {
awaitSingleMapUpdates();
onAllReceived();
}
}
/**
*
*/
private synchronized void awaitSingleMapUpdates() {
try {
while (pendingSingleUpdates > 0)
U.wait(this);
}
catch (IgniteInterruptedCheckedException e) {
U.warn(log, "Failed to wait for partition map updates, thread was interrupted: " + e);
}
}
/**
* @param fut Affinity future.
*/
private void onAffinityInitialized(IgniteInternalFuture<Map<Integer, Map<Integer, List<UUID>>>> fut) {
try {
assert fut.isDone();
Map<Integer, Map<Integer, List<UUID>>> assignmentChange = fut.get();
GridDhtPartitionsFullMessage m = createPartitionsMessage(null, false);
CacheAffinityChangeMessage msg = new CacheAffinityChangeMessage(exchId, m, assignmentChange);
if (log.isDebugEnabled())
log.debug("Centralized affinity exchange, send affinity change message: " + msg);
cctx.discovery().sendCustomEvent(msg);
}
catch (IgniteCheckedException e) {
onDone(e);
}
}
/**
* @param top Topology to assign.
*/
private void assignPartitionStates(GridDhtPartitionTopology top) {
Map<Integer, CounterWithNodes> maxCntrs = new HashMap<>();
for (Map.Entry<UUID, GridDhtPartitionsAbstractMessage> e : msgs.entrySet()) {
assert e.getValue().partitionUpdateCounters(top.cacheId()) != null;
for (Map.Entry<Integer, T2<Long, Long>> e0 : e.getValue().partitionUpdateCounters(top.cacheId()).entrySet()) {
int p = e0.getKey();
UUID uuid = e.getKey();
GridDhtPartitionState state = top.partitionState(uuid, p);
if (state != GridDhtPartitionState.OWNING)
continue;
Long cntr = e0.getValue().get1();
if (cntr == null)
continue;
CounterWithNodes maxCntr = maxCntrs.get(p);
if (maxCntr == null || cntr > maxCntr.cnt)
maxCntrs.put(p, new CounterWithNodes(cntr, uuid));
else if (cntr == maxCntr.cnt)
maxCntr.nodes.add(uuid);
}
}
// Also must process counters from the local node.
for (GridDhtLocalPartition part : top.currentLocalPartitions()) {
GridDhtPartitionState state = top.partitionState(cctx.localNodeId(), part.id());
if (state != GridDhtPartitionState.OWNING)
continue;
CounterWithNodes maxCntr = maxCntrs.get(part.id());
if (maxCntr == null || part.initialUpdateCounter() > maxCntr.cnt)
maxCntrs.put(part.id(), new CounterWithNodes(part.updateCounter(), cctx.localNodeId()));
else if (part.initialUpdateCounter() == maxCntr.cnt)
maxCntr.nodes.add(cctx.localNodeId());
}
int entryLeft = maxCntrs.size();
for (Map.Entry<Integer, CounterWithNodes> e : maxCntrs.entrySet()) {
int p = e.getKey();
long maxCntr = e.getValue().cnt;
entryLeft --;
if (entryLeft != 0 && maxCntr == 0)
continue;
top.setOwners(p, e.getValue().nodes, entryLeft == 0);
}
}
/**
* Detect lost partitions.
*/
private void detectLostPartitions() {
synchronized (cctx.exchange().interruptLock()) {
if (Thread.currentThread().isInterrupted())
return;
for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
if (!cacheCtx.isLocal())
cacheCtx.topology().detectLostPartitions(discoEvt);
}
}
}
/**
*
*/
private void resetLostPartitions(Collection<String> cacheNames) {
synchronized (cctx.exchange().interruptLock()) {
if (Thread.currentThread().isInterrupted())
return;
for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
if (!cacheCtx.isLocal() && cacheNames.contains(cacheCtx.name()))
cacheCtx.topology().resetLostPartitions();
}
}
}
/**
*
*/
private void onAllReceived() {
try {
assert crd.isLocal();
if (!crd.equals(discoCache.serverNodes().get(0))) {
for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
if (!cacheCtx.isLocal())
cacheCtx.topology().beforeExchange(this, !centralizedAff);
}
}
if (discoEvt.type() == EVT_NODE_JOINED) {
if (cctx.kernalContext().state().active())
assignPartitionsStates();
}
else if (discoEvt.type() == EVT_DISCOVERY_CUSTOM_EVT) {
assert discoEvt instanceof DiscoveryCustomEvent;
if (((DiscoveryCustomEvent)discoEvt).customMessage() instanceof DynamicCacheChangeBatch) {
DynamicCacheChangeBatch batch = (DynamicCacheChangeBatch)((DiscoveryCustomEvent)discoEvt)
.customMessage();
Set<String> caches = new HashSet<>();
for (DynamicCacheChangeRequest req : batch.requests()) {
if (req.resetLostPartitions())
caches.add(req.cacheName());
else if (req.globalStateChange() && req.state() != ClusterState.INACTIVE)
assignPartitionsStates();
}
if (!F.isEmpty(caches))
resetLostPartitions(caches);
}
}
else if (discoEvt.type() == EVT_NODE_LEFT || discoEvt.type() == EVT_NODE_FAILED)
detectLostPartitions();
updateLastVersion(cctx.versions().last());
cctx.versions().onExchange(lastVer.get().order());
if (centralizedAff) {
IgniteInternalFuture<Map<Integer, Map<Integer, List<UUID>>>> fut = cctx.affinity().initAffinityOnNodeLeft(this);
if (!fut.isDone()) {
fut.listen(new IgniteInClosure<IgniteInternalFuture<Map<Integer, Map<Integer, List<UUID>>>>>() {
@Override public void apply(IgniteInternalFuture<Map<Integer, Map<Integer, List<UUID>>>> fut) {
onAffinityInitialized(fut);
}
});
}
else
onAffinityInitialized(fut);
}
else {
List<ClusterNode> nodes;
synchronized (this) {
srvNodes.remove(cctx.localNode());
nodes = new ArrayList<>(srvNodes);
}
if (!nodes.isEmpty())
sendAllPartitions(nodes);
if (exchangeOnChangeGlobalState && !F.isEmpty(changeGlobalStateExceptions))
cctx.kernalContext().state().onFullResponseMessage(changeGlobalStateExceptions);
onDone(exchangeId().topologyVersion());
}
}
catch (IgniteCheckedException e) {
if (reconnectOnError(e))
onDone(new IgniteNeedReconnectException(cctx.localNode(), e));
else
onDone(e);
}
}
/**
*
*/
private void assignPartitionsStates() {
if (cctx.database().persistenceEnabled()) {
for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
if (cacheCtx.isLocal())
continue;
assignPartitionStates(cacheCtx.topology());
}
}
}
/**
* @param nodeId Node ID.
* @param retryCnt Number of retries.
*/
private void sendAllPartitions(final UUID nodeId, final int retryCnt) {
ClusterNode n = cctx.node(nodeId);
try {
if (n != null)
sendAllPartitions(F.asList(n));
}
catch (IgniteCheckedException e) {
if (e instanceof ClusterTopologyCheckedException || !cctx.discovery().alive(n)) {
if (log.isDebugEnabled())
log.debug("Failed to send full partition map to node, node left grid " +
"[rmtNode=" + nodeId + ", exchangeId=" + exchId + ']');
return;
}
if (reconnectOnError(e)) {
onDone(new IgniteNeedReconnectException(cctx.localNode(), e));
return;
}
if (retryCnt > 0) {
long timeout = cctx.gridConfig().getNetworkSendRetryDelay();
LT.error(log, e, "Failed to send full partition map to node (will retry after timeout) " +
"[node=" + nodeId + ", exchangeId=" + exchId + ", timeout=" + timeout + ']');
cctx.time().addTimeoutObject(new GridTimeoutObjectAdapter(timeout) {
@Override public void onTimeout() {
sendAllPartitions(nodeId, retryCnt - 1);
}
});
}
else
U.error(log, "Failed to send full partition map [node=" + n + ", exchangeId=" + exchId + ']', e);
}
}
/**
* @param node Sender node.
* @param msg Full partition info.
*/
public void onReceive(final ClusterNode node, final GridDhtPartitionsFullMessage msg) {
assert msg != null;
final UUID nodeId = node.id();
if (isDone()) {
if (log.isDebugEnabled())
log.debug("Received message for finished future [msg=" + msg + ", fut=" + this + ']');
return;
}
if (log.isDebugEnabled())
log.debug("Received full partition map from node [nodeId=" + nodeId + ", msg=" + msg + ']');
initFut.listen(new CI1<IgniteInternalFuture<Boolean>>() {
@Override public void apply(IgniteInternalFuture<Boolean> f) {
try {
if (!f.get())
return;
}
catch (IgniteCheckedException e) {
U.error(log, "Failed to initialize exchange future: " + this, e);
return;
}
processMessage(node, msg);
}
});
}
/**
* @param node Sender node.
* @param msg Message.
*/
private void processMessage(ClusterNode node, GridDhtPartitionsFullMessage msg) {
assert msg.exchangeId().equals(exchId) : msg;
assert msg.lastVersion() != null : msg;
synchronized (this) {
if (crd == null)
return;
if (!crd.equals(node)) {
if (log.isDebugEnabled())
log.debug("Received full partition map from unexpected node [oldest=" + crd.id() +
", nodeId=" + node.id() + ']');
if (node.order() > crd.order())
fullMsgs.put(node, msg);
return;
}
}
updatePartitionFullMap(msg);
if (exchangeOnChangeGlobalState && !F.isEmpty(msg.getExceptionsMap()))
cctx.kernalContext().state().onFullResponseMessage(msg.getExceptionsMap());
onDone(exchId.topologyVersion());
}
/**
* Updates partition map in all caches.
*
* @param msg Partitions full messages.
*/
private void updatePartitionFullMap(GridDhtPartitionsFullMessage msg) {
cctx.versions().onExchange(msg.lastVersion().order());
for (Map.Entry<Integer, GridDhtPartitionFullMap> entry : msg.partitions().entrySet()) {
Integer cacheId = entry.getKey();
Map<Integer, T2<Long, Long>> cntrMap = msg.partitionUpdateCounters(cacheId);
GridCacheContext cacheCtx = cctx.cacheContext(cacheId);
if (cacheCtx != null)
cacheCtx.topology().update(exchId, entry.getValue(), cntrMap);
else {
ClusterNode oldest = cctx.discovery().oldestAliveCacheServerNode(AffinityTopologyVersion.NONE);
if (oldest != null && oldest.isLocal())
cctx.exchange().clientTopology(cacheId, this).update(exchId, entry.getValue(), cntrMap);
}
}
}
/**
* Updates partition map in all caches.
*
* @param msg Partitions single message.
*/
private void updatePartitionSingleMap(ClusterNode node, GridDhtPartitionsSingleMessage msg) {
msgs.put(node.id(), msg);
for (Map.Entry<Integer, GridDhtPartitionMap> entry : msg.partitions().entrySet()) {
Integer cacheId = entry.getKey();
GridCacheContext cacheCtx = cctx.cacheContext(cacheId);
GridDhtPartitionTopology top = cacheCtx != null ? cacheCtx.topology() :
cctx.exchange().clientTopology(cacheId, this);
top.update(exchId, entry.getValue(), msg.partitionUpdateCounters(cacheId));
}
}
/**
* Affinity change message callback, processed from the same thread as {@link #onNodeLeft}.
*
* @param node Message sender node.
* @param msg Message.
*/
public void onAffinityChangeMessage(final ClusterNode node, final CacheAffinityChangeMessage msg) {
assert exchId.equals(msg.exchangeId()) : msg;
onDiscoveryEvent(new IgniteRunnable() {
@Override public void run() {
if (isDone() || !enterBusy())
return;
try {
assert centralizedAff;
if (crd.equals(node)) {
cctx.affinity().onExchangeChangeAffinityMessage(GridDhtPartitionsExchangeFuture.this,
crd.isLocal(),
msg);
if (!crd.isLocal()) {
GridDhtPartitionsFullMessage partsMsg = msg.partitionsMessage();
assert partsMsg != null : msg;
assert partsMsg.lastVersion() != null : partsMsg;
updatePartitionFullMap(partsMsg);
}
onDone(topologyVersion());
}
else {
if (log.isDebugEnabled()) {
log.debug("Ignore affinity change message, coordinator changed [node=" + node.id() +
", crd=" + crd.id() +
", msg=" + msg +
']');
}
}
}
finally {
leaveBusy();
}
}
});
}
/**
* @param c Closure.
*/
private void onDiscoveryEvent(IgniteRunnable c) {
synchronized (discoEvts) {
if (!init) {
discoEvts.add(c);
return;
}
assert discoEvts.isEmpty() : discoEvts;
}
c.run();
}
/**
*
*/
private void initDone() {
while (!isDone()) {
List<IgniteRunnable> evts;
synchronized (discoEvts) {
if (discoEvts.isEmpty()) {
init = true;
break;
}
evts = new ArrayList<>(discoEvts);
discoEvts.clear();
}
for (IgniteRunnable c : evts)
c.run();
}
initFut.onDone(true);
}
/**
* Node left callback, processed from the same thread as {@link #onAffinityChangeMessage}.
*
* @param node Left node.
*/
public void onNodeLeft(final ClusterNode node) {
if (isDone() || !enterBusy())
return;
cctx.mvcc().removeExplicitNodeLocks(node.id(), topologyVersion());
try {
onDiscoveryEvent(new IgniteRunnable() {
@Override public void run() {
if (isDone() || !enterBusy())
return;
try {
boolean crdChanged = false;
boolean allReceived = false;
Set<UUID> reqFrom = null;
ClusterNode crd0;
discoCache.updateAlives(node);
synchronized (this) {
if (!srvNodes.remove(node))
return;
boolean rmvd = remaining.remove(node.id());
if (node.equals(crd)) {
crdChanged = true;
crd = !srvNodes.isEmpty() ? srvNodes.get(0) : null;
}
if (crd != null && crd.isLocal()) {
if (rmvd)
allReceived = remaining.isEmpty();
if (crdChanged && !remaining.isEmpty())
reqFrom = new HashSet<>(remaining);
}
crd0 = crd;
}
if (crd0 == null) {
assert cctx.kernalContext().clientNode() || cctx.localNode().isDaemon() : cctx.localNode();
List<ClusterNode> empty = Collections.emptyList();
for (GridCacheContext cacheCtx : cctx.cacheContexts()) {
List<List<ClusterNode>> affAssignment = new ArrayList<>(cacheCtx.affinity().partitions());
for (int i = 0; i < cacheCtx.affinity().partitions(); i++)
affAssignment.add(empty);
cacheCtx.affinity().affinityCache().initialize(topologyVersion(), affAssignment);
}
onDone(topologyVersion());
return;
}
if (crd0.isLocal()) {
if (exchangeOnChangeGlobalState && changeGlobalStateE !=null)
changeGlobalStateExceptions.put(crd0.id(), changeGlobalStateE);
if (allReceived) {
awaitSingleMapUpdates();
onAllReceived();
return;
}
if (crdChanged && reqFrom != null) {
GridDhtPartitionsSingleRequest req = new GridDhtPartitionsSingleRequest(exchId);
for (UUID nodeId : reqFrom) {
try {
// It is possible that some nodes finished exchange with previous coordinator.
cctx.io().send(nodeId, req, SYSTEM_POOL);
}
catch (ClusterTopologyCheckedException ignored) {
if (log.isDebugEnabled())
log.debug("Node left during partition exchange [nodeId=" + nodeId +
", exchId=" + exchId + ']');
}
catch (IgniteCheckedException e) {
U.error(log, "Failed to request partitions from node: " + nodeId, e);
}
}
}
for (Map.Entry<ClusterNode, GridDhtPartitionsSingleMessage> m : singleMsgs.entrySet())
processMessage(m.getKey(), m.getValue());
}
else {
if (crdChanged) {
sendPartitions(crd0);
for (Map.Entry<ClusterNode, GridDhtPartitionsFullMessage> m : fullMsgs.entrySet())
processMessage(m.getKey(), m.getValue());
}
}
}
catch (Exception e) {
if (reconnectOnError(e))
onDone(new IgniteNeedReconnectException(cctx.localNode(), e));
else
throw e;
}
finally {
leaveBusy();
}
}
});
}
finally {
leaveBusy();
}
}
/**
* @param e Exception.
* @return {@code True} if local node should try reconnect in case of error.
*/
public boolean reconnectOnError(Throwable e) {
return X.hasCause(e, IOException.class, IgniteClientDisconnectedCheckedException.class) &&
cctx.discovery().reconnectSupported();
}
/** {@inheritDoc} */
@Override public int compareTo(GridDhtPartitionsExchangeFuture fut) {
return exchId.compareTo(fut.exchId);
}
/** {@inheritDoc} */
@Override public boolean equals(Object o) {
if (this == o)
return true;
GridDhtPartitionsExchangeFuture fut = (GridDhtPartitionsExchangeFuture)o;
return exchId.equals(fut.exchId);
}
/** {@inheritDoc} */
@Override public int hashCode() {
return exchId.hashCode();
}
/**
*
*/
enum ExchangeType {
/** */
CLIENT,
/** */
ALL,
/** */
NONE
}
/**
* Cache validation result.
*/
private static class CacheValidation {
/** Topology validation result. */
private boolean valid;
/** Lost partitions on this topology version. */
private Collection<Integer> lostParts;
/**
* @param valid Valid flag.
* @param lostParts Lost partitions.
*/
private CacheValidation(boolean valid, Collection<Integer> lostParts) {
this.valid = valid;
this.lostParts = lostParts;
}
}
/** {@inheritDoc} */
@Override public String toString() {
Set<UUID> remaining;
List<ClusterNode> srvNodes;
synchronized (this) {
remaining = new HashSet<>(this.remaining);
srvNodes = this.srvNodes != null ? new ArrayList<>(this.srvNodes) : null;
}
return S.toString(GridDhtPartitionsExchangeFuture.class, this,
"evtLatch", evtLatch == null ? "null" : evtLatch.getCount(),
"remaining", remaining,
"srvNodes", srvNodes,
"super", super.toString());
}
/**
*
*/
private static class CounterWithNodes {
/** */
private final long cnt;
/** */
private final Set<UUID> nodes = new HashSet<>();
/**
* @param cnt Count.
* @param firstNode Node ID.
*/
private CounterWithNodes(long cnt, UUID firstNode) {
this.cnt = cnt;
nodes.add(firstNode);
}
/** {@inheritDoc} */
@Override public String toString() {
return S.toString(CounterWithNodes.class, this);
}
}
}