package com.jivesoftware.os.amza.service.replication;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.jivesoftware.os.amza.api.partition.PartitionName;
import com.jivesoftware.os.amza.service.IndexedWALStorageProvider;
import com.jivesoftware.os.amza.service.StripingLocksProvider;
import com.jivesoftware.os.amza.service.stats.AmzaStats;
import com.jivesoftware.os.amza.service.storage.PartitionCreator;
import com.jivesoftware.os.amza.service.storage.PartitionIndex;
import com.jivesoftware.os.amza.service.storage.PartitionStore;
import com.jivesoftware.os.mlogger.core.MetricLogger;
import com.jivesoftware.os.mlogger.core.MetricLoggerFactory;
import java.io.File;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
/**
* @author jonathan.colt
*/
public class PartitionTombstoneCompactor {
private static final MetricLogger LOG = MetricLoggerFactory.getLogger();
private ScheduledExecutorService scheduledThreadPool;
private final AmzaStats amzaStats;
private final IndexedWALStorageProvider indexedWALStorageProvider;
private final PartitionCreator partitionCreator;
private final PartitionIndex partitionIndex;
private final StorageVersionProvider storageVersionProvider;
private final long checkIfTombstoneCompactionIsNeededIntervalInMillis;
private final long rebalanceableEveryNMillis;
private final int numberOfStripes;
private final long[] rebalanceableAfterTimestamp;
private final StripingLocksProvider<PartitionName> locksProvider = new StripingLocksProvider<>(1024);
public PartitionTombstoneCompactor(AmzaStats amzaStats,
IndexedWALStorageProvider indexedWALStorageProvider,
PartitionCreator partitionCreator,
PartitionIndex partitionIndex,
StorageVersionProvider storageVersionProvider,
long checkIfCompactionIsNeededIntervalInMillis,
long rebalanceableEveryNMillis,
int numberOfStripes) {
this.amzaStats = amzaStats;
this.indexedWALStorageProvider = indexedWALStorageProvider;
this.partitionCreator = partitionCreator;
this.partitionIndex = partitionIndex;
this.storageVersionProvider = storageVersionProvider;
this.checkIfTombstoneCompactionIsNeededIntervalInMillis = checkIfCompactionIsNeededIntervalInMillis;
this.rebalanceableEveryNMillis = rebalanceableEveryNMillis;
this.numberOfStripes = numberOfStripes;
this.rebalanceableAfterTimestamp = new long[numberOfStripes];
}
public void start() throws Exception {
final int silenceBackToBackErrors = 100;
ThreadFactory threadFactory = new ThreadFactoryBuilder().setNameFormat("partition-tombstone-compactor-%d").build();
scheduledThreadPool = Executors.newScheduledThreadPool(numberOfStripes, threadFactory);
for (int i = 0; i < numberOfStripes; i++) {
int stripe = i;
int[] failedToCompact = { 0 };
scheduledThreadPool.scheduleWithFixedDelay(() -> {
try {
failedToCompact[0] = 0;
compactTombstone(false, stripe);
} catch (Exception x) {
LOG.debug("Failing to compact tombstones.", x);
if (failedToCompact[0] % silenceBackToBackErrors == 0) {
failedToCompact[0]++;
LOG.error("Failing to compact tombstones.");
}
}
}, checkIfTombstoneCompactionIsNeededIntervalInMillis, checkIfTombstoneCompactionIsNeededIntervalInMillis, TimeUnit.MILLISECONDS);
}
}
public void stop() throws Exception {
this.scheduledThreadPool.shutdownNow();
this.scheduledThreadPool = null;
}
public void compactTombstone(boolean force, int compactStripe) throws Exception {
int[] rebalanced = new int[1];
partitionIndex.streamActivePartitions((versionedPartitionName) -> {
PartitionName partitionName = versionedPartitionName.getPartitionName();
synchronized (locksProvider.lock(partitionName, 123)) {
storageVersionProvider.tx(partitionName,
null,
(deltaIndex, stripeIndex, storageVersion) -> {
if (storageVersion != null
&& stripeIndex != -1
&& storageVersion.partitionVersion == versionedPartitionName.getPartitionVersion()
&& (compactStripe == -1 || stripeIndex == compactStripe)) {
PartitionStore partitionStore = partitionCreator.get("compact", versionedPartitionName, stripeIndex);
if (partitionStore == null) {
return null;
}
boolean forced = force;
int compactToStripe = stripeIndex;
File fromBaseKey = indexedWALStorageProvider.baseKey(versionedPartitionName, stripeIndex);
File toBaseKey = fromBaseKey;
int rebalanceToStripe = -1;
long disposalVersion = -1;
if (!partitionName.isSystemPartition()) {
disposalVersion = partitionCreator.getPartitionDisposal(versionedPartitionName.getPartitionName());
if (force || rebalancingIsActive()) {
rebalanceToStripe = indexedWALStorageProvider.rebalanceToStripe(versionedPartitionName,
stripeIndex,
partitionStore.getProperties());
if (rebalanceToStripe > -1) {
forced = true;
compactToStripe = rebalanceToStripe;
toBaseKey = indexedWALStorageProvider.baseKey(versionedPartitionName, compactToStripe);
LOG.info("Rebalancing by compacting {} from {}:{} to {}:{}",
partitionName,
stripeIndex,
fromBaseKey,
compactToStripe,
toBaseKey);
}
}
}
int effectivelyFinalRebalanceToStripe = rebalanceToStripe;
partitionStore.compactTombstone(
forced,
fromBaseKey,
toBaseKey,
compactToStripe,
disposalVersion,
(transitionToCompactedTx) -> {
return storageVersionProvider.replaceOneWithAll(partitionName,
() -> {
return transitionToCompactedTx.tx(() -> {
if (effectivelyFinalRebalanceToStripe != -1) {
rebalanced[0]++;
storageVersionProvider.transitionStripe(versionedPartitionName,
storageVersion,
effectivelyFinalRebalanceToStripe);
LOG.info("Rebalancing transitioned {} to {}", partitionName, effectivelyFinalRebalanceToStripe);
}
return null;
});
});
});
}
return null;
});
return true;
}
});
if (compactStripe != -1 && rebalanced[0] == 0 && System.currentTimeMillis() > rebalanceableAfterTimestamp[compactStripe]) {
rebalanceableAfterTimestamp[compactStripe] = System.currentTimeMillis() + rebalanceableEveryNMillis;
LOG.info("Rebalancing for stripe {} has been paused until {}", compactStripe, rebalanceableAfterTimestamp[compactStripe]);
}
}
private boolean rebalancingIsActive() {
long timestamp = System.currentTimeMillis();
for (int i = 0; i < rebalanceableAfterTimestamp.length; i++) {
if (timestamp > rebalanceableAfterTimestamp[i]) {
return true;
}
}
return false;
}
}