package com.jivesoftware.os.amza.service.replication;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.jivesoftware.os.amza.api.AmzaInterner;
import com.jivesoftware.os.amza.api.partition.PartitionName;
import com.jivesoftware.os.amza.api.partition.VersionedAquarium;
import com.jivesoftware.os.amza.api.partition.VersionedPartitionName;
import com.jivesoftware.os.amza.api.scan.RowChanges;
import com.jivesoftware.os.amza.api.scan.RowsChanged;
import com.jivesoftware.os.amza.api.wal.WALKey;
import com.jivesoftware.os.amza.service.AmzaRingStoreReader;
import com.jivesoftware.os.amza.service.NotARingMemberException;
import com.jivesoftware.os.amza.service.PartitionIsDisposedException;
import com.jivesoftware.os.amza.service.StripingLocksProvider;
import com.jivesoftware.os.amza.service.stats.AmzaStats;
import com.jivesoftware.os.amza.service.stats.AmzaStats.CompactionFamily;
import com.jivesoftware.os.amza.service.stats.AmzaStats.CompactionStats;
import com.jivesoftware.os.amza.service.storage.PartitionCreator;
import com.jivesoftware.os.amza.service.storage.PartitionIndex;
import com.jivesoftware.os.aquarium.State;
import com.jivesoftware.os.jive.utils.collections.bah.ConcurrentBAHash;
import com.jivesoftware.os.mlogger.core.MetricLogger;
import com.jivesoftware.os.mlogger.core.MetricLoggerFactory;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import static com.jivesoftware.os.amza.service.storage.PartitionCreator.AQUARIUM_STATE_INDEX;
import static com.jivesoftware.os.amza.service.storage.PartitionCreator.PARTITION_VERSION_INDEX;
import static com.jivesoftware.os.amza.service.storage.PartitionCreator.REGION_INDEX;
/**
* @author jonathan.colt
*/
public class PartitionComposter implements RowChanges {
private static final MetricLogger LOG = MetricLoggerFactory.getLogger();
private ScheduledExecutorService scheduledThreadPool;
private final AmzaStats amzaSystemStats;
private final AmzaStats amzaStats;
private final PartitionIndex partitionIndex;
private final PartitionCreator partitionCreator;
private final AmzaRingStoreReader amzaRingReader;
private final PartitionStripeProvider partitionStripeProvider;
private final StorageVersionProvider storageVersionProvider;
private final AmzaInterner amzaInterner;
private final StripingLocksProvider<PartitionName> stripingLocksProvider;
private final ConcurrentBAHash<byte[]> dirtyPartitions;
private volatile boolean coldstart = true;
public PartitionComposter(AmzaStats amzaSystemStats,
AmzaStats amzaStats,
PartitionIndex partitionIndex,
PartitionCreator partitionCreator,
AmzaRingStoreReader amzaRingReader,
PartitionStripeProvider partitionStripeProvider,
StorageVersionProvider storageVersionProvider,
AmzaInterner amzaInterner,
int concurrency) {
this.amzaSystemStats = amzaSystemStats;
this.amzaStats = amzaStats;
this.partitionIndex = partitionIndex;
this.partitionCreator = partitionCreator;
this.amzaRingReader = amzaRingReader;
this.partitionStripeProvider = partitionStripeProvider;
this.storageVersionProvider = storageVersionProvider;
this.amzaInterner = amzaInterner;
this.stripingLocksProvider = new StripingLocksProvider<>(64); //TODO config
this.dirtyPartitions = new ConcurrentBAHash<>(3, false, concurrency);
}
public void start() throws Exception {
scheduledThreadPool = Executors.newScheduledThreadPool(1,
new ThreadFactoryBuilder().setNameFormat("partition-composter-%d").build());
scheduledThreadPool.scheduleWithFixedDelay(() -> {
try {
compostAll();
} catch (Exception x) {
LOG.error("Failing to compost", x);
}
}, 0, 1, TimeUnit.MINUTES); // TODO config
}
public void stop() throws Exception {
this.scheduledThreadPool.shutdownNow();
this.scheduledThreadPool = null;
}
@Override
public void changes(RowsChanged changes) throws Exception {
PartitionName partitionName = changes.getVersionedPartitionName().getPartitionName();
if (partitionName.equals(REGION_INDEX.getPartitionName())) {
for (WALKey key : changes.getApply().keySet()) {
dirtyPartitions.put(key.key, key.key);
}
} else if (partitionName.equals(PARTITION_VERSION_INDEX.getPartitionName())) {
for (WALKey key : changes.getApply().keySet()) {
byte[] dirtyBytes = storageVersionProvider.partitionNameFromKey(key.key).toBytes();
dirtyPartitions.put(dirtyBytes, dirtyBytes);
}
} else if (partitionName.equals(AQUARIUM_STATE_INDEX.getPartitionName())) {
for (WALKey key : changes.getApply().keySet()) {
AmzaAquariumProvider.streamStateKey(key.key, amzaInterner,
(dirtyPartitionName, context, rootRingMember, partitionVersion, isSelf, ackRingMember) -> {
byte[] dirtyBytes = dirtyPartitionName.toBytes();
dirtyPartitions.put(dirtyBytes, dirtyBytes);
return true;
});
}
}
}
public void compostAll() throws Exception {
List<VersionedPartitionName> composted = new ArrayList<>();
try {
if (coldstart) {
partitionStripeProvider.streamLocalAquariums((partitionName, ringMember, versionedAquarium) -> {
VersionedPartitionName versionedPartitionName = versionedAquarium.getVersionedPartitionName();
if (compostIfNecessary(versionedAquarium)) {
composted.add(versionedPartitionName);
}
return true;
});
coldstart = false;
} else {
dirtyPartitions.stream((key, value) -> {
PartitionName partitionName = amzaInterner.internPartitionName(key, 0, key.length);
dirtyPartitions.remove(key);
try {
partitionStripeProvider.txPartition(partitionName, (txPartitionStripe, highwaterStorage, versionedAquarium) -> {
VersionedPartitionName versionedPartitionName = versionedAquarium.getVersionedPartitionName();
if (compostIfNecessary(versionedAquarium)) {
composted.add(versionedPartitionName);
}
return null;
});
return true;
} catch (PartitionIsDisposedException e) {
LOG.info("Ignored disposed partition: {}", partitionName);
return true;
} catch (NotARingMemberException e) {
LOG.debug("Skipped compost for non-member partition without storage: {}", partitionName);
return true;
} catch (Throwable t) {
dirtyPartitions.put(key, key);
throw t;
}
});
}
} catch (Exception e) {
LOG.warn("Error while composting partitions", e);
}
for (VersionedPartitionName compost : composted) {
try {
partitionStripeProvider.expunged(compost);
} catch (Exception e) {
LOG.warn("Failed to expunge {}", new Object[]{compost}, e);
}
}
}
public void compostPartitionIfNecessary(PartitionName partitionName) throws Exception {
VersionedPartitionName compost = partitionStripeProvider.txPartition(partitionName,
(txPartitionStripe, highwaterStorage, versionedAquarium) -> {
if (compostIfNecessary(versionedAquarium)) {
return versionedAquarium.getVersionedPartitionName();
} else {
return null;
}
});
if (compost != null) {
partitionStripeProvider.expunged(compost);
}
}
private boolean compostIfNecessary(VersionedAquarium versionedAquarium) throws Exception {
VersionedPartitionName versionedPartitionName = versionedAquarium.getVersionedPartitionName();
PartitionName partitionName = versionedPartitionName.getPartitionName();
synchronized (stripingLocksProvider.lock(partitionName, 0)) {
State currentState = versionedAquarium.getLivelyEndState().getCurrentState();
if (currentState == State.expunged // Aquarium State
|| !storageVersionProvider.isCurrentVersion(versionedPartitionName)) { // Partition Version
deletePartition(versionedPartitionName);
return true;
} else if (!amzaRingReader.isMemberOfRing(partitionName.getRingName(), 0)) {
if (currentState == State.bootstrap || currentState == null) {
LOG.info("Composting {} state:{} because we are not a member of the ring",
versionedPartitionName, currentState);
deletePartition(versionedPartitionName);
return true;
} else {
LOG.info("Marking {} state:{} for disposal because we are not a member of the ring",
versionedPartitionName, currentState);
versionedAquarium.suggestState(State.expunged);
return false;
}
} else if (!partitionCreator.hasPartition(partitionName)) {
if (currentState == State.bootstrap || currentState == null) {
LOG.info("Composting {} state:{} because no partition is defined on this node",
versionedPartitionName, currentState);
deletePartition(versionedPartitionName);
return true;
} else {
LOG.info("Marking {} state:{} for disposal because no partition is defined on this node",
versionedPartitionName, currentState);
versionedAquarium.suggestState(State.expunged);
return false;
}
} else {
return false;
}
}
}
private void deletePartition(VersionedPartitionName versionedPartitionName) {
PartitionName partitionName = versionedPartitionName.getPartitionName();
long partitionVersion = versionedPartitionName.getPartitionVersion();
AmzaStats stats = partitionName.isSystemPartition() ? amzaSystemStats : amzaStats;
CompactionStats compactionStats = stats.beginCompaction(CompactionFamily.expunge, versionedPartitionName.toString());
try {
LOG.info("Expunging {} {}.", partitionName, partitionVersion);
partitionStripeProvider.txPartition(partitionName, (txPartitionStripe, highwaterStorage, versionedAquarium) -> {
txPartitionStripe.tx((deltaIndex, stripeIndex, partitionStripe) -> {
partitionStripe.deleteDelta(versionedPartitionName);
partitionIndex.delete(versionedPartitionName, stripeIndex);
return null;
});
highwaterStorage.delete(versionedPartitionName);
return null;
});
LOG.info("Expunged {} {}.", partitionName, partitionVersion);
} catch (Exception e) {
LOG.error("Failed to compost partition {}", new Object[]{versionedPartitionName}, e);
} finally {
compactionStats.finished();
}
}
}