/*
* Copyright 2013 Jive Software, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.jivesoftware.os.amza.service;
import com.google.common.base.Preconditions;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Iterables;
import com.google.common.collect.SetMultimap;
import com.jivesoftware.os.amza.api.TimestampedValue;
import com.jivesoftware.os.amza.api.ring.RingHost;
import com.jivesoftware.os.amza.api.ring.RingMember;
import com.jivesoftware.os.amza.api.ring.RingMemberAndHost;
import com.jivesoftware.os.amza.api.scan.RowChanges;
import com.jivesoftware.os.amza.api.scan.RowsChanged;
import com.jivesoftware.os.amza.api.wal.WALKey;
import com.jivesoftware.os.amza.api.wal.WALUpdated;
import com.jivesoftware.os.amza.service.ring.AmzaRingReader;
import com.jivesoftware.os.amza.service.ring.AmzaRingWriter;
import com.jivesoftware.os.amza.service.ring.CacheId;
import com.jivesoftware.os.amza.service.ring.RingSet;
import com.jivesoftware.os.amza.service.ring.RingTopology;
import com.jivesoftware.os.amza.service.storage.PartitionCreator;
import com.jivesoftware.os.amza.service.storage.SystemWALStorage;
import com.jivesoftware.os.jive.utils.collections.bah.ConcurrentBAHash;
import com.jivesoftware.os.jive.utils.ordered.id.TimestampedOrderIdProvider;
import com.jivesoftware.os.mlogger.core.MetricLogger;
import com.jivesoftware.os.mlogger.core.MetricLoggerFactory;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.atomic.AtomicLong;
public class AmzaRingStoreWriter implements AmzaRingWriter, RowChanges {
private static final MetricLogger LOG = MetricLoggerFactory.getLogger();
private final AmzaRingStoreReader ringStoreReader;
private final SystemWALStorage systemWALStorage;
private final TimestampedOrderIdProvider orderIdProvider;
private final WALUpdated walUpdated;
private final ConcurrentBAHash<CacheId<RingTopology>> ringsCache;
private final ConcurrentBAHash<CacheId<RingSet>> ringMemberRingNamesCache;
private final AtomicLong nodeCacheId;
private final boolean rackDistributionEnabled;
public AmzaRingStoreWriter(AmzaRingStoreReader ringStoreReader,
SystemWALStorage systemWALStorage,
TimestampedOrderIdProvider orderIdProvider,
WALUpdated walUpdated,
ConcurrentBAHash<CacheId<RingTopology>> ringsCache,
ConcurrentBAHash<CacheId<RingSet>> ringMemberRingNamesCache,
AtomicLong nodeCacheId,
boolean rackDistributionEnabled) {
this.ringStoreReader = ringStoreReader;
this.systemWALStorage = systemWALStorage;
this.orderIdProvider = orderIdProvider;
this.walUpdated = walUpdated;
this.ringsCache = ringsCache;
this.ringMemberRingNamesCache = ringMemberRingNamesCache;
this.nodeCacheId = nodeCacheId;
this.rackDistributionEnabled = rackDistributionEnabled;
}
@Override
public void changes(RowsChanged changes) throws Exception {
if (PartitionCreator.RING_INDEX.equals(changes.getVersionedPartitionName())) {
for (WALKey walKey : changes.getApply().keySet()) {
byte[] ringBytes = ringStoreReader.keyToRingName(walKey);
ringsCache.compute(ringBytes, (key, cacheIdRingTopology) -> {
if (cacheIdRingTopology == null) {
cacheIdRingTopology = new CacheId<>(null);
}
cacheIdRingTopology.currentCacheId++;
/*LOG.info("Rings advanced {} to {}", Arrays.toString(ringBytes), cacheIdRingTopology.currentCacheId);*/
return cacheIdRingTopology;
});
RingMember ringMember = ringStoreReader.keyToRingMember(walKey.key);
ringMemberRingNamesCache.compute(ringMember.leakBytes(), (ringMember1, cacheIdRingSet) -> {
if (cacheIdRingSet == null) {
cacheIdRingSet = new CacheId<>(null);
}
cacheIdRingSet.currentCacheId++;
return cacheIdRingSet;
});
}
} else if (PartitionCreator.NODE_INDEX.equals(changes.getVersionedPartitionName())) {
if (!changes.getApply().isEmpty()) {
nodeCacheId.incrementAndGet();
/*LOG.info("Node advanced to {}", nodeCacheId.get());*/
}
}
}
@Override
public void register(RingMember ringMember, RingHost ringHost, long timestampId, boolean force) throws Exception {
TimestampedValue registeredHost = force ? null : systemWALStorage.getTimestampedValue(PartitionCreator.NODE_INDEX, null, ringMember.toBytes());
if (registeredHost == null || !ringHost.equals(RingHost.fromBytes(registeredHost.getValue()))) {
long version = orderIdProvider.nextId();
long timestamp = (timestampId == -1) ? version : timestampId;
systemWALStorage.update(PartitionCreator.NODE_INDEX, null,
(highwater, scan) -> scan.row(-1, ringMember.toBytes(), ringHost.toBytes(), timestamp, false, version),
walUpdated);
LOG.info("register ringMember:{} as ringHost:{}", ringMember, ringHost);
}
addRingMember(AmzaRingReader.SYSTEM_RING, ringMember);
}
@Override
public void deregister(RingMember ringMember) throws Exception {
removeRingMember(AmzaRingReader.SYSTEM_RING, ringMember);
long timestampAndVersion = orderIdProvider.nextId();
systemWALStorage.update(PartitionCreator.NODE_INDEX, null,
(highwater, scan) -> scan.row(-1, ringMember.toBytes(), null, timestampAndVersion, true, timestampAndVersion),
walUpdated);
LOG.info("deregister ringMember:{}");
}
public boolean isMemberOfRing(byte[] ringName, long timeoutInMillis) throws Exception {
return ringStoreReader.isMemberOfRing(ringName, timeoutInMillis);
}
@Override
public void ensureMaximalRing(byte[] ringName, long timeoutInMillis) throws Exception {
ensureSubRing(ringName, ringStoreReader.getRingSize(AmzaRingReader.SYSTEM_RING, timeoutInMillis), timeoutInMillis);
}
@Override
public void ensureSubRing(byte[] ringName, int desiredRingSize, long timeoutInMillis) throws Exception {
if (ringName == null) {
throw new IllegalArgumentException("ringName cannot be null.");
}
int ringSize = ringStoreReader.getRingSize(ringName, timeoutInMillis);
if (ringSize < desiredRingSize) {
LOG.info("Ring {} will grow, has {} desires {}", ringName, ringSize, desiredRingSize);
buildRandomSubRing(ringName, desiredRingSize);
}
}
private void buildRandomSubRing(byte[] ringName, int desiredRingSize) throws Exception {
if (ringName == null) {
throw new IllegalArgumentException("ringName cannot be null.");
}
RingTopology systemRing = ringStoreReader.getRing(AmzaRingReader.SYSTEM_RING, 0);
if (systemRing.entries.size() < desiredRingSize) {
throw new IllegalStateException("Current 'system' ring is not large enough to support a ring of size:" + desiredRingSize);
}
SetMultimap<String, RingMemberAndHost> subRackMembers = HashMultimap.create();
RingTopology subRing = ringStoreReader.getRing(ringName, 0);
for (RingMemberAndHost entry : subRing.entries) {
String rack = rackDistributionEnabled ? entry.ringHost.getRack() : "";
subRackMembers.put(rack, entry);
}
Map<String, List<RingMemberAndHost>> systemRackMembers = new HashMap<>();
for (RingMemberAndHost entry : systemRing.entries) {
String rack = rackDistributionEnabled ? entry.ringHost.getRack() : "";
systemRackMembers.computeIfAbsent(rack, (key) -> new ArrayList<>()).add(entry);
}
Random random = new Random(new Random(Arrays.hashCode(ringName)).nextLong());
for (List<RingMemberAndHost> rackMembers : systemRackMembers.values()) {
Collections.shuffle(rackMembers, random);
}
List<String> racks = new ArrayList<>(systemRackMembers.keySet());
while (subRackMembers.size() < desiredRingSize) {
Collections.sort(racks, (o1, o2) -> Integer.compare(subRackMembers.get(o1).size(), subRackMembers.get(o2).size()));
boolean advanced = false;
for (String cycleRack : racks) {
List<RingMemberAndHost> rackMembers = systemRackMembers.get(cycleRack);
if (!rackMembers.isEmpty()) {
subRackMembers.put(cycleRack, rackMembers.remove(rackMembers.size() - 1));
advanced = true;
break;
}
}
if (!advanced) {
break;
}
}
setInternal(ringName, Iterables.transform(subRackMembers.values(), input -> input.ringMember));
}
@Override
public void addRingMember(byte[] ringName, RingMember ringMember) throws Exception {
Preconditions.checkNotNull(ringName, "ringName cannot be null.");
Preconditions.checkNotNull(ringMember, "ringMember cannot be null.");
byte[] key = ringStoreReader.key(ringName, ringMember);
TimestampedValue had = systemWALStorage.getTimestampedValue(PartitionCreator.RING_INDEX, null, key);
if (had == null) {
RingTopology ring = ringStoreReader.getRing(ringName, -1);
setInternal(ringName, Iterables.concat(Iterables.transform(ring.entries, input -> input.ringMember), Collections.singleton(ringMember)));
}
}
private void setInternal(byte[] ringName, Iterable<RingMember> members) throws Exception {
/*
We deliberately do a slab update of rings to ensure "all at once" ring visibility.
*/
systemWALStorage.update(PartitionCreator.RING_INDEX, null,
(highwater, scan) -> {
long timestampAndVersion = orderIdProvider.nextId();
for (RingMember member : members) {
if (!scan.row(-1, ringStoreReader.key(ringName, member), new byte[0], timestampAndVersion, false, timestampAndVersion)) {
return false;
}
}
return true;
}, walUpdated);
ringsCache.remove(ringName);
//ringsCache.remove(new IBA(ringName));
LOG.info("Ring update:{} -> {}", new String(ringName), members);
}
@Override
public void removeRingMember(byte[] ringName, RingMember ringMember) throws Exception {
Preconditions.checkNotNull(ringName, "ringName cannot be null.");
Preconditions.checkNotNull(ringMember, "ringMember cannot be null.");
byte[] key = ringStoreReader.key(ringName, ringMember);
TimestampedValue had = systemWALStorage.getTimestampedValue(PartitionCreator.RING_INDEX, null, key);
if (had != null) {
long timestampAndVersion = orderIdProvider.nextId();
systemWALStorage.update(PartitionCreator.RING_INDEX, null,
(highwater, scan) -> scan.row(-1, key, null, timestampAndVersion, true, timestampAndVersion),
walUpdated);
}
}
}