/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.ignite.internal.processors.cache.distributed.dht.preloader;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.UUID;
import org.apache.ignite.IgniteCheckedException;
import org.apache.ignite.IgniteLogger;
import org.apache.ignite.cluster.ClusterNode;
import org.apache.ignite.internal.cluster.ClusterTopologyCheckedException;
import org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion;
import org.apache.ignite.internal.processors.cache.GridCacheContext;
import org.apache.ignite.internal.processors.cache.GridCacheEntryInfo;
import org.apache.ignite.internal.processors.cache.IgniteRebalanceIterator;
import org.apache.ignite.internal.processors.cache.database.CacheDataRow;
import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtLocalPartition;
import org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopology;
import org.apache.ignite.internal.util.lang.GridCloseableIterator;
import org.apache.ignite.internal.util.tostring.GridToStringExclude;
import org.apache.ignite.internal.util.typedef.T3;
import org.apache.ignite.internal.util.typedef.internal.S;
import org.apache.ignite.internal.util.typedef.internal.U;
import org.apache.ignite.lang.IgnitePredicate;
import org.apache.ignite.spi.IgniteSpiException;
import static org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionState.OWNING;
/**
* Thread pool for supplying partitions to demanding nodes.
*/
class GridDhtPartitionSupplier {
/** */
private final GridCacheContext<?, ?> cctx;
/** */
private final IgniteLogger log;
/** */
private GridDhtPartitionTopology top;
/** */
private final boolean depEnabled;
/** Preload predicate. */
private IgnitePredicate<GridCacheEntryInfo> preloadPred;
/** Supply context map. T2: nodeId, idx, topVer. */
private final Map<T3<UUID, Integer, AffinityTopologyVersion>, SupplyContext> scMap = new HashMap<>();
/**
* @param cctx Cache context.
*/
GridDhtPartitionSupplier(GridCacheContext<?, ?> cctx) {
assert cctx != null;
this.cctx = cctx;
log = cctx.logger(getClass());
top = cctx.dht().topology();
depEnabled = cctx.gridDeploy().enabled();
}
/**
*
*/
void stop() {
synchronized (scMap) {
Iterator<T3<UUID, Integer, AffinityTopologyVersion>> it = scMap.keySet().iterator();
while (it.hasNext()) {
T3<UUID, Integer, AffinityTopologyVersion> t = it.next();
clearContext(scMap.get(t), log);
it.remove();
}
}
}
/**
* Clear context.
*
* @param sc Supply context.
* @param log Logger.
*/
private static void clearContext(
final SupplyContext sc,
final IgniteLogger log) {
if (sc != null) {
final Iterator it = sc.entryIt;
if (it != null && it instanceof GridCloseableIterator && !((GridCloseableIterator)it).isClosed()) {
try {
((GridCloseableIterator)it).close();
}
catch (IgniteCheckedException e) {
U.error(log, "Iterator close failed.", e);
}
}
final GridDhtLocalPartition loc = sc.loc;
if (loc != null) {
assert loc.reservations() > 0;
loc.release();
}
}
}
/**
* Handles new topology.
*
* @param topVer Topology version.
*/
@SuppressWarnings("ConstantConditions")
public void onTopologyChanged(AffinityTopologyVersion topVer) {
synchronized (scMap) {
Iterator<T3<UUID, Integer, AffinityTopologyVersion>> it = scMap.keySet().iterator();
while (it.hasNext()) {
T3<UUID, Integer, AffinityTopologyVersion> t = it.next();
if (topVer.compareTo(t.get3()) > 0) { // Clear all obsolete contexts.
clearContext(scMap.get(t), log);
it.remove();
if (log.isDebugEnabled())
log.debug("Supply context removed [node=" + t.get1() + "]");
}
}
}
}
/**
* Sets preload predicate for supply pool.
*
* @param preloadPred Preload predicate.
*/
void preloadPredicate(IgnitePredicate<GridCacheEntryInfo> preloadPred) {
this.preloadPred = preloadPred;
}
/**
* @param d Demand message.
* @param idx Index.
* @param id Node uuid.
*/
@SuppressWarnings("unchecked")
public void handleDemandMessage(int idx, UUID id, GridDhtPartitionDemandMessage d) {
assert d != null;
assert id != null;
AffinityTopologyVersion cutTop = cctx.affinity().affinityTopologyVersion();
AffinityTopologyVersion demTop = d.topologyVersion();
T3<UUID, Integer, AffinityTopologyVersion> scId = new T3<>(id, idx, demTop);
if (d.updateSequence() == -1) { //Demand node requested context cleanup.
synchronized (scMap) {
clearContext(scMap.remove(scId), log);
return;
}
}
if (cutTop.compareTo(demTop) > 0) {
if (log.isDebugEnabled())
log.debug("Demand request cancelled [current=" + cutTop + ", demanded=" + demTop +
", from=" + id + ", idx=" + idx + "]");
return;
}
if (log.isDebugEnabled())
log.debug("Demand request accepted [current=" + cutTop + ", demanded=" + demTop +
", from=" + id + ", idx=" + idx + "]");
GridDhtPartitionSupplyMessage s = new GridDhtPartitionSupplyMessage(
d.updateSequence(), cctx.cacheId(), d.topologyVersion(), cctx.deploymentEnabled());
ClusterNode node = cctx.discovery().node(id);
if (node == null)
return; // Context will be cleaned at topology change.
try {
SupplyContext sctx;
synchronized (scMap) {
sctx = scMap.remove(scId);
assert sctx == null || d.updateSequence() == sctx.updateSeq;
}
// Initial demand request should contain partitions list.
if (sctx == null && d.partitions() == null)
return;
assert !(sctx != null && d.partitions() != null);
long bCnt = 0;
SupplyContextPhase phase = SupplyContextPhase.NEW;
boolean newReq = true;
long maxBatchesCnt = cctx.config().getRebalanceBatchesPrefetchCount();
if (sctx != null) {
phase = sctx.phase;
maxBatchesCnt = 1;
}
else {
if (log.isDebugEnabled())
log.debug("Starting supplying rebalancing [cache=" + cctx.name() +
", fromNode=" + node.id() + ", partitionsCount=" + d.partitions().size() +
", topology=" + d.topologyVersion() + ", updateSeq=" + d.updateSequence() +
", idx=" + idx + "]");
}
Iterator<Integer> partIt = sctx != null ? sctx.partIt : d.partitions().iterator();
while ((sctx != null && newReq) || partIt.hasNext()) {
int part = sctx != null && newReq ? sctx.part : partIt.next();
newReq = false;
GridDhtLocalPartition loc;
if (sctx != null && sctx.loc != null) {
loc = sctx.loc;
assert loc.reservations() > 0;
}
else {
loc = top.localPartition(part, d.topologyVersion(), false);
if (loc == null || loc.state() != OWNING || !loc.reserve()) {
// Reply with partition of "-1" to let sender know that
// this node is no longer an owner.
s.missed(part);
if (log.isDebugEnabled())
log.debug("Requested partition is not owned by local node [part=" + part +
", demander=" + id + ']');
continue;
}
}
try {
boolean partMissing = false;
if (phase == SupplyContextPhase.NEW)
phase = SupplyContextPhase.OFFHEAP;
if (phase == SupplyContextPhase.OFFHEAP) {
IgniteRebalanceIterator iter;
if (sctx == null || sctx.entryIt == null) {
iter = cctx.offheap().rebalanceIterator(part, d.topologyVersion(), d.partitionCounter(part));
if (!iter.historical())
s.clean(part);
}
else
iter = (IgniteRebalanceIterator)sctx.entryIt;
while (iter.hasNext()) {
if (!cctx.affinity().partitionBelongs(node, part, d.topologyVersion())) {
// Demander no longer needs this partition,
// so we send '-1' partition and move on.
s.missed(part);
if (log.isDebugEnabled())
log.debug("Demanding node does not need requested partition " +
"[part=" + part + ", nodeId=" + id + ']');
partMissing = true;
if (sctx != null) {
sctx = new SupplyContext(
phase,
partIt,
null,
part,
loc,
d.updateSequence());
}
break;
}
if (s.messageSize() >= cctx.config().getRebalanceBatchSize()) {
if (++bCnt >= maxBatchesCnt) {
saveSupplyContext(scId,
phase,
partIt,
part,
iter,
loc,
d.topologyVersion(),
d.updateSequence());
loc = null;
reply(node, d, s, scId);
return;
}
else {
if (!reply(node, d, s, scId))
return;
s = new GridDhtPartitionSupplyMessage(d.updateSequence(),
cctx.cacheId(),
d.topologyVersion(),
cctx.deploymentEnabled());
}
}
CacheDataRow row = iter.next();
GridCacheEntryInfo info = new GridCacheEntryInfo();
info.key(row.key());
info.expireTime(row.expireTime());
info.version(row.version());
info.value(row.value());
if (preloadPred == null || preloadPred.apply(info))
s.addEntry0(part, info, cctx);
else {
if (log.isDebugEnabled())
log.debug("Rebalance predicate evaluated to false (will not send " +
"cache entry): " + info);
continue;
}
// Need to manually prepare cache message.
// TODO GG-11141.
// if (depEnabled && !prepared) {
// ClassLoader ldr = swapEntry.keyClassLoaderId() != null ?
// cctx.deploy().getClassLoader(swapEntry.keyClassLoaderId()) :
// swapEntry.valueClassLoaderId() != null ?
// cctx.deploy().getClassLoader(swapEntry.valueClassLoaderId()) :
// null;
//
// if (ldr == null)
// continue;
//
// if (ldr instanceof GridDeploymentInfo) {
// s.prepare((GridDeploymentInfo)ldr);
//
// prepared = true;
// }
// }
}
if (partMissing)
continue;
}
// Mark as last supply message.
s.last(part);
phase = SupplyContextPhase.NEW;
sctx = null;
}
finally {
if (loc != null)
loc.release();
}
}
reply(node, d, s, scId);
if (log.isDebugEnabled())
log.debug("Finished supplying rebalancing [cache=" + cctx.name() +
", fromNode=" + node.id() +
", topology=" + d.topologyVersion() + ", updateSeq=" + d.updateSequence() +
", idx=" + idx + "]");
}
catch (IgniteCheckedException e) {
U.error(log, "Failed to send partition supply message to node: " + id, e);
}
catch (IgniteSpiException e) {
if (log.isDebugEnabled())
log.debug("Failed to send message to node (current node is stopping?) [node=" + node.id() +
", msg=" + e.getMessage() + ']');
}
}
/**
* @param n Node.
* @param d DemandMessage
* @param s Supply message.
* @return {@code True} if message was sent, {@code false} if recipient left grid.
* @throws IgniteCheckedException If failed.
*/
private boolean reply(ClusterNode n,
GridDhtPartitionDemandMessage d,
GridDhtPartitionSupplyMessage s,
T3<UUID, Integer, AffinityTopologyVersion> scId)
throws IgniteCheckedException {
try {
if (log.isDebugEnabled())
log.debug("Replying to partition demand [node=" + n.id() + ", demand=" + d + ", supply=" + s + ']');
cctx.io().sendOrderedMessage(n, d.topic(), s, cctx.ioPolicy(), d.timeout());
// Throttle preloading.
if (cctx.config().getRebalanceThrottle() > 0)
U.sleep(cctx.config().getRebalanceThrottle());
return true;
}
catch (ClusterTopologyCheckedException ignore) {
if (log.isDebugEnabled())
log.debug("Failed to send partition supply message because node left grid: " + n.id());
synchronized (scMap) {
clearContext(scMap.remove(scId), log);
}
return false;
}
}
/**
* @param t Tuple.
* @param phase Phase.
* @param partIt Partition it.
* @param part Partition.
* @param entryIt Entry it.
*/
private void saveSupplyContext(
T3<UUID, Integer, AffinityTopologyVersion> t,
SupplyContextPhase phase,
Iterator<Integer> partIt,
int part,
Iterator<?> entryIt,
GridDhtLocalPartition loc,
AffinityTopologyVersion topVer,
long updateSeq) {
synchronized (scMap) {
if (cctx.affinity().affinityTopologyVersion().equals(topVer)) {
assert scMap.get(t) == null;
scMap.put(t,
new SupplyContext(phase,
partIt,
entryIt,
part,
loc,
updateSeq));
}
else if (loc != null) {
assert loc.reservations() > 0;
loc.release();
}
}
}
/**
* Supply context phase.
*/
private enum SupplyContextPhase {
/** */
NEW,
/** */
OFFHEAP
}
/**
* Supply context.
*/
private static class SupplyContext {
/** Phase. */
private final SupplyContextPhase phase;
/** Partition iterator. */
@GridToStringExclude
private final Iterator<Integer> partIt;
/** Entry iterator. */
@GridToStringExclude
private final Iterator<?> entryIt;
/** Partition. */
private final int part;
/** Local partition. */
private final GridDhtLocalPartition loc;
/** Update seq. */
private final long updateSeq;
/**
* @param phase Phase.
* @param partIt Partition iterator.
* @param loc Partition.
* @param updateSeq Update sequence.
* @param entryIt Entry iterator.
* @param part Partition.
*/
public SupplyContext(SupplyContextPhase phase,
Iterator<Integer> partIt,
Iterator<?> entryIt,
int part,
GridDhtLocalPartition loc,
long updateSeq) {
this.phase = phase;
this.partIt = partIt;
this.entryIt = entryIt;
this.part = part;
this.loc = loc;
this.updateSeq = updateSeq;
}
/** {@inheritDoc} */
public String toString() {
return S.toString(SupplyContext.class, this);
}
}
/**
* Dumps debug information.
*/
public void dumpDebugInfo() {
synchronized (scMap) {
if (!scMap.isEmpty()) {
U.warn(log, "Rebalancing supplier reserved following partitions:");
for (SupplyContext sc : scMap.values()) {
if (sc.loc != null)
U.warn(log, ">>> " + sc.loc);
}
}
}
}
}