package org.infinispan.server.hotrod;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import org.infinispan.AdvancedCache;
import org.infinispan.commons.hash.Hash;
import org.infinispan.configuration.cache.Configuration;
import org.infinispan.distribution.DistributionManager;
import org.infinispan.distribution.ch.ConsistentHash;
import org.infinispan.distribution.ch.KeyPartitioner;
import org.infinispan.distribution.ch.impl.HashFunctionPartitioner;
import org.infinispan.distribution.group.impl.PartitionerConsistentHash;
import org.infinispan.distribution.group.impl.GroupingPartitioner;
import org.infinispan.remoting.transport.Address;
import org.infinispan.server.hotrod.transport.ExtendedByteBuf;
import org.infinispan.util.KeyValuePair;
import io.netty.buffer.ByteBuf;
/**
* Hot Rod encoder for protocol version 1.1
*
* @author Galder ZamarreƱo
* @since 5.2
*/
public abstract class AbstractTopologyAwareEncoder1x extends AbstractEncoder1x {
@Override
protected AbstractHashDistAwareResponse createHashDistAwareResp(int topologyId,
Map<Address, ServerAddress> serverEndpointsMap, Configuration cfg) {
return new HashDistAware11Response(topologyId, serverEndpointsMap, cfg.clustering().hash().numOwners(),
Constants.DEFAULT_CONSISTENT_HASH_VERSION_1x, Integer.MAX_VALUE, 1);
}
@Override
void writeHashTopologyUpdate(AbstractHashDistAwareResponse h, HotRodServer server, Response r, ByteBuf buffer) {
if (h instanceof HashDistAware11Response) {
writeHashTopologyUpdate11((HashDistAware11Response) h, server, r, buffer);
} else {
throw new IllegalStateException(
"Expected version 1.1 specific response: " + h);
}
}
void writeHashTopologyUpdate11(HashDistAware11Response h, HotRodServer server, Response r, ByteBuf buf) {
log.tracef("Write hash distribution change response header %s", h);
if (h.hashFunction == 0) {
writeLimitedHashTopologyUpdate(h, buf);
return;
}
AdvancedCache<byte[], byte[]> cache = server.getCacheInstance(r.cacheName, server.getCacheManager(), false, true);
// This is not quite correct, as the ownership of segments on the 1.0/1.1 clients is not exactly
// the same as on the server. But the difference appears only for (numSegment*numOwners/MAX_INT)
// of the keys (at the "segment borders"), so it's still much better than having no hash information.
// The idea here is to be able to be compatible with clients running version 1.0 of the protocol.
// With time, users should migrate to version 1.2 capable clients.
DistributionManager distManager = cache.getDistributionManager();
ConsistentHash ch = distManager.getReadConsistentHash();
int numSegments = ch.getNumSegments();
// Collect all the hash ids in a collection so we can write the correct size.
// There will be more than one hash id for each server, so we can't use a map.
List<KeyValuePair<ServerAddress, Integer>> hashIds = new ArrayList<>(numSegments);
List<Integer>[] allDenormalizedHashIds = denormalizeSegmentHashIds(ch);
for (int segmentIdx = 0; segmentIdx < numSegments; ++segmentIdx) {
List<Integer> denormalizedSegmentHashIds = allDenormalizedHashIds[segmentIdx];
List<Address> segmentOwners = ch.locateOwnersForSegment(segmentIdx);
for (int ownerIdx = 0; ownerIdx < segmentOwners.size(); ++ownerIdx) {
Address address = segmentOwners.get(ownerIdx % segmentOwners.size());
ServerAddress serverAddress = h.serverEndpointsMap.get(address);
if (serverAddress != null) {
Integer hashId = denormalizedSegmentHashIds.get(ownerIdx);
hashIds.add(new KeyValuePair<>(serverAddress, hashId));
} else {
log.tracef("Could not find member %s in the address cache", address);
}
}
}
writeCommonHashTopologyHeader(buf, h.topologyId, h.numOwners,
h.hashFunction, h.hashSpace, hashIds.size());
ExtendedByteBuf.writeUnsignedInt(1, buf); // Num virtual nodes
for (KeyValuePair<ServerAddress, Integer> serverHash : hashIds) {
// TODO: why need cast to Object....
log.tracef("Writing hash id %d for %s:%s", (Object) serverHash.getValue(), serverHash.getKey().getHost(),
serverHash.getKey().getPort());
ExtendedByteBuf.writeString(serverHash.getKey().getHost(), buf);
ExtendedByteBuf.writeUnsignedShort(serverHash.getKey().getPort(), buf);
buf.writeInt(serverHash.getValue());
}
}
@Override
void writeLimitedHashTopologyUpdate(AbstractTopologyResponse t, ByteBuf buffer) {
log.tracef("Return limited hash distribution aware header in spite of having a hash aware client %s", t);
writeCommonHashTopologyHeader(buffer, t.topologyId, 0, (byte) 0, 0, t.serverEndpointsMap.size());
ExtendedByteBuf.writeUnsignedInt(1, buffer); // Num virtual nodes
for (ServerAddress address : t.serverEndpointsMap.values()) {
ExtendedByteBuf.writeString(address.getHost(), buffer);
ExtendedByteBuf.writeUnsignedShort(address.getPort(), buffer);
buffer.writeInt(0); // Address' hash id
}
}
// "Denormalize" the segments - for each hash segment, find numOwners integer values that map on the hash wheel
// to the interval [segmentIdx*segmentSize, segmentIdx*segmentSize+leeway], leeway being hardcoded
// on the first line of the function
// TODO This relies on implementation details (segment layout) of DefaultConsistentHash, and won't work with any other CH
List<Integer>[] denormalizeSegmentHashIds(ConsistentHash ch) {
// This is the fraction of keys we allow to have "wrong" owners. The algorithm below takes longer
// as this value decreases, and at some point it starts hanging (checked with an assert below)
double leewayFraction = 0.0002;
int numOwners = ch.getNumOwners();
int numSegments = ch.getNumSegments();
int segmentSize = (int) Math.ceil((double) Integer.MAX_VALUE / numSegments);
int leeway = (int) (leewayFraction * segmentSize);
assert (leeway > 2 * numOwners);
Map<Integer, Integer>[] ownerHashes = new Map[numSegments];
for (int i = 0; i < numSegments; ++i) {
ownerHashes[i] = new HashMap<>();
}
KeyPartitioner keyPartitioner = ((PartitionerConsistentHash) ch).getKeyPartitioner();
extractHash(keyPartitioner).ifPresent(h -> {
int i = 0;
int segmentsLeft = numSegments;
while (segmentsLeft != 0) {
int normalizedHash = h.hash(i) & Integer.MAX_VALUE;
if (normalizedHash % segmentSize < leeway) {
int nextSegmentIdx = normalizedHash / segmentSize;
int segmentIdx = (nextSegmentIdx - 1 + numSegments) % numSegments;
Map<Integer, Integer> segmentHashes = ownerHashes[segmentIdx];
if (segmentHashes.size() < numOwners) {
segmentHashes.put(normalizedHash, i);
if (segmentHashes.size() == numOwners) {
segmentsLeft -= 1;
}
}
}
// Allows overflow, if we didn't find all segments in the 0..MAX_VALUE range
i += 1;
}
});
log.tracef("Found denormalized hashes: %s", ownerHashes);
List<Integer>[] results = new List[ownerHashes.length];
// Sort each list of hashes by the normalized hash and then return a list with only the denormalized hash
int i = 0;
for (Map<Integer, Integer> ownerHash : ownerHashes) {
results[i++] = ownerHash.entrySet().stream()
.sorted(Comparator.comparing(Map.Entry::getKey))
.map(Map.Entry::getValue)
.collect(Collectors.toList());
}
return results;
}
Optional<Hash> extractHash(KeyPartitioner keyPartitioner) {
if (keyPartitioner instanceof HashFunctionPartitioner) {
return Optional.of(((HashFunctionPartitioner) keyPartitioner).getHash());
} else if (keyPartitioner instanceof GroupingPartitioner) {
return extractHash(((GroupingPartitioner) keyPartitioner).unwrap());
} else {
return Optional.empty();
}
}
}