/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.ignite.internal.processors.cache.distributed.dht;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.ignite.IgniteCheckedException;
import org.apache.ignite.IgniteLogger;
import org.apache.ignite.cluster.ClusterNode;
import org.apache.ignite.internal.IgniteInternalFuture;
import org.apache.ignite.internal.cluster.ClusterTopologyCheckedException;
import org.apache.ignite.internal.cluster.ClusterTopologyServerNotFoundException;
import org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion;
import org.apache.ignite.internal.processors.cache.CacheObject;
import org.apache.ignite.internal.processors.cache.EntryGetResult;
import org.apache.ignite.internal.processors.cache.GridCacheContext;
import org.apache.ignite.internal.processors.cache.GridCacheEntryEx;
import org.apache.ignite.internal.processors.cache.GridCacheEntryInfo;
import org.apache.ignite.internal.processors.cache.GridCacheEntryRemovedException;
import org.apache.ignite.internal.processors.cache.GridCacheMessage;
import org.apache.ignite.internal.processors.cache.IgniteCacheExpiryPolicy;
import org.apache.ignite.internal.processors.cache.KeyCacheObject;
import org.apache.ignite.internal.processors.cache.database.CacheDataRow;
import org.apache.ignite.internal.processors.cache.distributed.near.GridNearGetRequest;
import org.apache.ignite.internal.processors.cache.distributed.near.GridNearGetResponse;
import org.apache.ignite.internal.processors.cache.version.GridCacheVersion;
import org.apache.ignite.internal.util.GridLeanMap;
import org.apache.ignite.internal.util.future.GridFinishedFuture;
import org.apache.ignite.internal.util.future.GridFutureAdapter;
import org.apache.ignite.internal.util.tostring.GridToStringInclude;
import org.apache.ignite.internal.util.typedef.C1;
import org.apache.ignite.internal.util.typedef.CI1;
import org.apache.ignite.internal.util.typedef.CIX1;
import org.apache.ignite.internal.util.typedef.F;
import org.apache.ignite.internal.util.typedef.P1;
import org.apache.ignite.internal.util.typedef.internal.CU;
import org.apache.ignite.internal.util.typedef.internal.S;
import org.apache.ignite.internal.util.typedef.internal.U;
import org.apache.ignite.lang.IgniteUuid;
import org.jetbrains.annotations.Nullable;
/**
* Colocated get future.
*/
public class GridPartitionedGetFuture<K, V> extends CacheDistributedGetFutureAdapter<K, V> {
/** */
private static final long serialVersionUID = 0L;
/** Logger reference. */
private static final AtomicReference<IgniteLogger> logRef = new AtomicReference<>();
/** Logger. */
private static IgniteLogger log;
/** Topology version. */
private AffinityTopologyVersion topVer;
/**
* @param cctx Context.
* @param keys Keys.
* @param topVer Topology version.
* @param readThrough Read through flag.
* @param forcePrimary If {@code true} then will force network trip to primary node even
* if called on backup node.
* @param subjId Subject ID.
* @param taskName Task name.
* @param deserializeBinary Deserialize binary flag.
* @param expiryPlc Expiry policy.
* @param skipVals Skip values flag.
* @param canRemap Flag indicating whether future can be remapped on a newer topology version.
* @param needVer If {@code true} returns values as tuples containing value and version.
* @param keepCacheObjects Keep cache objects flag.
*/
public GridPartitionedGetFuture(
GridCacheContext<K, V> cctx,
Collection<KeyCacheObject> keys,
AffinityTopologyVersion topVer,
boolean readThrough,
boolean forcePrimary,
@Nullable UUID subjId,
String taskName,
boolean deserializeBinary,
boolean recovery,
@Nullable IgniteCacheExpiryPolicy expiryPlc,
boolean skipVals,
boolean canRemap,
boolean needVer,
boolean keepCacheObjects
) {
super(cctx,
keys,
readThrough,
forcePrimary,
subjId,
taskName,
deserializeBinary,
expiryPlc,
skipVals,
canRemap,
needVer,
keepCacheObjects,
recovery);
this.topVer = topVer;
if (log == null)
log = U.logger(cctx.kernalContext(), logRef, GridPartitionedGetFuture.class);
}
/**
* Initializes future.
*/
public void init() {
AffinityTopologyVersion lockedTopVer = cctx.shared().lockedTopologyVersion(null);
if (lockedTopVer != null) {
canRemap = false;
map(keys, Collections.<ClusterNode, LinkedHashMap<KeyCacheObject, Boolean>>emptyMap(), lockedTopVer);
}
else {
AffinityTopologyVersion topVer = this.topVer.topologyVersion() > 0 ? this.topVer :
canRemap ? cctx.affinity().affinityTopologyVersion() : cctx.shared().exchange().readyAffinityVersion();
map(keys, Collections.<ClusterNode, LinkedHashMap<KeyCacheObject, Boolean>>emptyMap(), topVer);
}
markInitialized();
}
/** {@inheritDoc} */
@Override public boolean trackable() {
return trackable;
}
/** {@inheritDoc} */
@Override public void markNotTrackable() {
// Should not flip trackable flag from true to false since get future can be remapped.
}
/** {@inheritDoc} */
@Override public IgniteUuid futureId() {
return futId;
}
/** {@inheritDoc} */
@Override public boolean onNodeLeft(UUID nodeId) {
boolean found = false;
for (IgniteInternalFuture<Map<K, V>> fut : futures())
if (isMini(fut)) {
MiniFuture f = (MiniFuture)fut;
if (f.node().id().equals(nodeId)) {
found = true;
f.onNodeLeft(new ClusterTopologyCheckedException("Remote node left grid (will retry): " + nodeId));
}
}
return found;
}
/**
* @param nodeId Sender.
* @param res Result.
*/
@Override public void onResult(UUID nodeId, GridNearGetResponse res) {
for (IgniteInternalFuture<Map<K, V>> fut : futures()) {
if (isMini(fut)) {
MiniFuture f = (MiniFuture)fut;
if (f.futureId().equals(res.miniId())) {
assert f.node().id().equals(nodeId);
f.onResult(res);
}
}
}
}
/** {@inheritDoc} */
@Override public boolean onDone(Map<K, V> res, Throwable err) {
if (super.onDone(res, err)) {
// Don't forget to clean up.
if (trackable)
cctx.mvcc().removeFuture(futId);
cache().sendTtlUpdateRequest(expiryPlc);
return true;
}
return false;
}
/**
* @param f Future.
* @return {@code True} if mini-future.
*/
private boolean isMini(IgniteInternalFuture<?> f) {
return f.getClass().equals(MiniFuture.class);
}
/**
* @param keys Keys.
* @param mapped Mappings to check for duplicates.
* @param topVer Topology version on which keys should be mapped.
*/
private void map(
Collection<KeyCacheObject> keys,
Map<ClusterNode, LinkedHashMap<KeyCacheObject, Boolean>> mapped,
AffinityTopologyVersion topVer
) {
Collection<ClusterNode> cacheNodes = CU.affinityNodes(cctx, topVer);
if (cacheNodes.isEmpty()) {
onDone(new ClusterTopologyServerNotFoundException("Failed to map keys for cache " +
"(all partition nodes left the grid) [topVer=" + topVer + ", cache=" + cctx.name() + ']'));
return;
}
GridDhtTopologyFuture topFut = cctx.shared().exchange().lastFinishedFuture();
Throwable err = topFut != null ? topFut.validateCache(cctx, recovery, true, null, keys) : null;
if (err != null) {
onDone(err);
return;
}
Map<ClusterNode, LinkedHashMap<KeyCacheObject, Boolean>> mappings = U.newHashMap(cacheNodes.size());
final int keysSize = keys.size();
Map<K, V> locVals = U.newHashMap(keysSize);
boolean hasRmtNodes = false;
// Assign keys to primary nodes.
for (KeyCacheObject key : keys)
hasRmtNodes |= map(key, mappings, locVals, topVer, mapped);
if (isDone())
return;
if (!locVals.isEmpty())
add(new GridFinishedFuture<>(locVals));
if (hasRmtNodes) {
if (!trackable) {
trackable = true;
cctx.mvcc().addFuture(this, futId);
}
}
// Create mini futures.
for (Map.Entry<ClusterNode, LinkedHashMap<KeyCacheObject, Boolean>> entry : mappings.entrySet()) {
final ClusterNode n = entry.getKey();
final LinkedHashMap<KeyCacheObject, Boolean> mappedKeys = entry.getValue();
assert !mappedKeys.isEmpty();
// If this is the primary or backup node for the keys.
if (n.isLocal()) {
final GridDhtFuture<Collection<GridCacheEntryInfo>> fut =
cache().getDhtAsync(n.id(),
-1,
mappedKeys,
readThrough,
topVer,
subjId,
taskName == null ? 0 : taskName.hashCode(),
expiryPlc,
skipVals,
recovery);
final Collection<Integer> invalidParts = fut.invalidPartitions();
if (!F.isEmpty(invalidParts)) {
Collection<KeyCacheObject> remapKeys = new ArrayList<>(keysSize);
for (KeyCacheObject key : keys) {
if (key != null && invalidParts.contains(cctx.affinity().partition(key)))
remapKeys.add(key);
}
AffinityTopologyVersion updTopVer = cctx.discovery().topologyVersionEx();
assert updTopVer.compareTo(topVer) > 0 : "Got invalid partitions for local node but topology version did " +
"not change [topVer=" + topVer + ", updTopVer=" + updTopVer +
", invalidParts=" + invalidParts + ']';
// Remap recursively.
map(remapKeys, mappings, updTopVer);
}
// Add new future.
add(fut.chain(new C1<IgniteInternalFuture<Collection<GridCacheEntryInfo>>, Map<K, V>>() {
@Override public Map<K, V> apply(IgniteInternalFuture<Collection<GridCacheEntryInfo>> fut) {
try {
return createResultMap(fut.get());
}
catch (Exception e) {
U.error(log, "Failed to get values from dht cache [fut=" + fut + "]", e);
onDone(e);
return Collections.emptyMap();
}
}
}));
}
else {
MiniFuture fut = new MiniFuture(n, mappedKeys, topVer);
GridCacheMessage req = new GridNearGetRequest(
cctx.cacheId(),
futId,
fut.futureId(),
null,
mappedKeys,
readThrough,
topVer,
subjId,
taskName == null ? 0 : taskName.hashCode(),
expiryPlc != null ? expiryPlc.forCreate() : -1L,
expiryPlc != null ? expiryPlc.forAccess() : -1L,
skipVals,
cctx.deploymentEnabled(),
recovery);
add(fut); // Append new future.
try {
cctx.io().send(n, req, cctx.ioPolicy());
}
catch (IgniteCheckedException e) {
// Fail the whole thing.
if (e instanceof ClusterTopologyCheckedException)
fut.onNodeLeft((ClusterTopologyCheckedException)e);
else
fut.onResult(e);
}
}
}
}
/**
* @param mappings Mappings.
* @param key Key to map.
* @param locVals Local values.
* @param topVer Topology version.
* @param mapped Previously mapped.
* @return {@code True} if has remote nodes.
*/
@SuppressWarnings("ConstantConditions")
private boolean map(
KeyCacheObject key,
Map<ClusterNode, LinkedHashMap<KeyCacheObject, Boolean>> mappings,
Map<K, V> locVals,
AffinityTopologyVersion topVer,
Map<ClusterNode, LinkedHashMap<KeyCacheObject, Boolean>> mapped
) {
int part = cctx.affinity().partition(key);
List<ClusterNode> affNodes = cctx.affinity().nodesByPartition(part, topVer);
if (affNodes.isEmpty()) {
onDone(serverNotFoundError(topVer));
return false;
}
boolean fastLocGet = (!forcePrimary || affNodes.get(0).isLocal()) &&
cctx.allowFastLocalRead(part, affNodes, topVer);
if (fastLocGet && localGet(key, part, locVals))
return false;
ClusterNode node = affinityNode(affNodes);
if (node == null) {
onDone(serverNotFoundError(topVer));
return false;
}
boolean remote = !node.isLocal();
LinkedHashMap<KeyCacheObject, Boolean> keys = mapped.get(node);
if (keys != null && keys.containsKey(key)) {
if (REMAP_CNT_UPD.incrementAndGet(this) > MAX_REMAP_CNT) {
onDone(new ClusterTopologyCheckedException("Failed to remap key to a new node after " +
MAX_REMAP_CNT + " attempts (key got remapped to the same node) [key=" + key + ", node=" +
U.toShortString(node) + ", mappings=" + mapped + ']'));
return false;
}
}
LinkedHashMap<KeyCacheObject, Boolean> old = mappings.get(node);
if (old == null)
mappings.put(node, old = new LinkedHashMap<>(3, 1f));
old.put(key, false);
return remote;
}
/**
* @param key Key.
* @param part Partition.
* @param locVals Local values.
* @return {@code True} if there is no need to further search value.
*/
private boolean localGet(KeyCacheObject key, int part, Map<K, V> locVals) {
assert cctx.affinityNode() : this;
GridDhtCacheAdapter<K, V> cache = cache();
boolean readNoEntry = cctx.readNoEntry(expiryPlc, false);
boolean evt = !skipVals;
while (true) {
try {
boolean skipEntry = readNoEntry;
EntryGetResult getRes = null;
CacheObject v = null;
GridCacheVersion ver = null;
if (readNoEntry) {
CacheDataRow row = cctx.offheap().read(key);
if (row != null) {
long expireTime = row.expireTime();
if (expireTime == 0 || expireTime > U.currentTimeMillis()) {
v = row.value();
if (needVer)
ver = row.version();
if (evt) {
cctx.events().readEvent(key,
null,
row.value(),
subjId,
taskName,
!deserializeBinary);
}
}
else
skipEntry = false;
}
}
if (!skipEntry) {
GridCacheEntryEx entry = cache.entryEx(key);
// If our DHT cache do has value, then we peek it.
if (entry != null) {
boolean isNew = entry.isNewLocked();
if (needVer) {
getRes = entry.innerGetVersioned(
null,
null,
/*update-metrics*/false,
/*event*/evt,
subjId,
null,
taskName,
expiryPlc,
!deserializeBinary,
null);
if (getRes != null) {
v = getRes.value();
ver = getRes.version();
}
}
else {
v = entry.innerGet(
null,
null,
/*read-through*/false,
/*update-metrics*/false,
/*event*/evt,
subjId,
null,
taskName,
expiryPlc,
!deserializeBinary);
}
cache.context().evicts().touch(entry, topVer);
// Entry was not in memory or in swap, so we remove it from cache.
if (v == null) {
if (isNew && entry.markObsoleteIfEmpty(ver))
cache.removeEntry(entry);
}
}
}
if (v != null) {
cctx.addResult(locVals,
key,
v,
skipVals,
keepCacheObjects,
deserializeBinary,
true,
getRes,
ver,
0,
0,
needVer);
return true;
}
boolean topStable = cctx.isReplicated() || topVer.equals(cctx.topology().topologyVersion());
// Entry not found, do not continue search if topology did not change and there is no store.
if (!cctx.readThroughConfigured() && (topStable || partitionOwned(part))) {
if (!skipVals && cctx.config().isStatisticsEnabled())
cache.metrics0().onRead(false);
return true;
}
return false;
}
catch (GridCacheEntryRemovedException ignored) {
// No-op, will retry.
}
catch (GridDhtInvalidPartitionException ignored) {
return false;
}
catch (IgniteCheckedException e) {
onDone(e);
return true;
}
}
}
/**
* @return Near cache.
*/
private GridDhtCacheAdapter<K, V> cache() {
return cctx.dht();
}
/**
* @param infos Entry infos.
* @return Result map.
*/
private Map<K, V> createResultMap(Collection<GridCacheEntryInfo> infos) {
int keysSize = infos.size();
if (keysSize != 0) {
Map<K, V> map = new GridLeanMap<>(keysSize);
for (GridCacheEntryInfo info : infos) {
assert skipVals == (info.value() == null);
cctx.addResult(map,
info.key(),
info.value(),
skipVals,
keepCacheObjects,
deserializeBinary,
false,
needVer ? info.version() : null,
0,
0);
}
return map;
}
return Collections.emptyMap();
}
/** {@inheritDoc} */
@Override public String toString() {
Collection<String> futs = F.viewReadOnly(futures(), new C1<IgniteInternalFuture<?>, String>() {
@SuppressWarnings("unchecked")
@Override public String apply(IgniteInternalFuture<?> f) {
if (isMini(f)) {
return "[node=" + ((MiniFuture)f).node().id() +
", loc=" + ((MiniFuture)f).node().isLocal() +
", done=" + f.isDone() + "]";
}
else
return "[loc=true, done=" + f.isDone() + "]";
}
});
return S.toString(GridPartitionedGetFuture.class, this,
"innerFuts", futs,
"super", super.toString());
}
/**
* Mini-future for get operations. Mini-futures are only waiting on a single
* node as opposed to multiple nodes.
*/
private class MiniFuture extends GridFutureAdapter<Map<K, V>> {
/** */
private final IgniteUuid futId = IgniteUuid.randomUuid();
/** Node ID. */
private final ClusterNode node;
/** Keys. */
@GridToStringInclude
private final LinkedHashMap<KeyCacheObject, Boolean> keys;
/** Topology version on which this future was mapped. */
private final AffinityTopologyVersion topVer;
/** {@code True} if remapped after node left. */
private boolean remapped;
/**
* @param node Node.
* @param keys Keys.
* @param topVer Topology version.
*/
MiniFuture(ClusterNode node, LinkedHashMap<KeyCacheObject, Boolean> keys, AffinityTopologyVersion topVer) {
this.node = node;
this.keys = keys;
this.topVer = topVer;
}
/**
* @return Future ID.
*/
IgniteUuid futureId() {
return futId;
}
/**
* @return Node ID.
*/
public ClusterNode node() {
return node;
}
/**
* @return Keys.
*/
public Collection<KeyCacheObject> keys() {
return keys.keySet();
}
/**
* @param e Error.
*/
void onResult(Throwable e) {
if (log.isDebugEnabled())
log.debug("Failed to get future result [fut=" + this + ", err=" + e + ']');
// Fail.
onDone(e);
}
/**
* @param e Failure exception.
*/
@SuppressWarnings("UnusedParameters")
synchronized void onNodeLeft(ClusterTopologyCheckedException e) {
if (remapped)
return;
remapped = true;
if (log.isDebugEnabled())
log.debug("Remote node left grid while sending or waiting for reply (will retry): " + this);
// Try getting from existing nodes.
if (!canRemap) {
map(keys.keySet(), F.t(node, keys), topVer);
onDone(Collections.<K, V>emptyMap());
}
else {
final AffinityTopologyVersion updTopVer =
new AffinityTopologyVersion(Math.max(topVer.topologyVersion() + 1, cctx.discovery().topologyVersion()));
cctx.affinity().affinityReadyFuture(updTopVer).listen(
new CI1<IgniteInternalFuture<AffinityTopologyVersion>>() {
@Override public void apply(IgniteInternalFuture<AffinityTopologyVersion> fut) {
try {
fut.get();
// Remap.
map(keys.keySet(), F.t(node, keys), updTopVer);
onDone(Collections.<K, V>emptyMap());
}
catch (IgniteCheckedException e) {
GridPartitionedGetFuture.this.onDone(e);
}
}
}
);
}
}
/**
* @param res Result callback.
*/
@SuppressWarnings("ThrowableResultOfMethodCallIgnored")
void onResult(final GridNearGetResponse res) {
final Collection<Integer> invalidParts = res.invalidPartitions();
// If error happened on remote node, fail the whole future.
if (res.error() != null) {
onDone(res.error());
return;
}
// Remap invalid partitions.
if (!F.isEmpty(invalidParts)) {
AffinityTopologyVersion rmtTopVer = res.topologyVersion();
assert !rmtTopVer.equals(AffinityTopologyVersion.ZERO);
if (rmtTopVer.compareTo(topVer) <= 0) {
// Fail the whole get future.
onDone(new IgniteCheckedException("Failed to process invalid partitions response (remote node reported " +
"invalid partitions but remote topology version does not differ from local) " +
"[topVer=" + topVer + ", rmtTopVer=" + rmtTopVer + ", invalidParts=" + invalidParts +
", nodeId=" + node.id() + ']'));
return;
}
if (log.isDebugEnabled())
log.debug("Remapping mini get future [invalidParts=" + invalidParts + ", fut=" + this + ']');
if (!canRemap) {
map(F.view(keys.keySet(), new P1<KeyCacheObject>() {
@Override public boolean apply(KeyCacheObject key) {
return invalidParts.contains(cctx.affinity().partition(key));
}
}), F.t(node, keys), topVer);
onDone(createResultMap(res.entries()));
return;
}
// Need to wait for next topology version to remap.
IgniteInternalFuture<AffinityTopologyVersion> topFut = cctx.affinity().affinityReadyFuture(rmtTopVer);
topFut.listen(new CIX1<IgniteInternalFuture<AffinityTopologyVersion>>() {
@SuppressWarnings("unchecked")
@Override public void applyx(IgniteInternalFuture<AffinityTopologyVersion> fut) throws IgniteCheckedException {
AffinityTopologyVersion topVer = fut.get();
// This will append new futures to compound list.
map(F.view(keys.keySet(), new P1<KeyCacheObject>() {
@Override public boolean apply(KeyCacheObject key) {
return invalidParts.contains(cctx.affinity().partition(key));
}
}), F.t(node, keys), topVer);
onDone(createResultMap(res.entries()));
}
});
}
else {
try {
onDone(createResultMap(res.entries()));
}
catch (Exception e) {
onDone(e);
}
}
}
/** {@inheritDoc} */
@Override public String toString() {
return S.toString(MiniFuture.class, this);
}
}
}