package com.jivesoftware.os.amza.service;
import com.jivesoftware.os.amza.api.partition.PartitionProperties;
import com.jivesoftware.os.amza.api.partition.VersionedPartitionName;
import com.jivesoftware.os.amza.api.wal.PrimaryRowMarshaller;
import com.jivesoftware.os.amza.api.wal.WALIndex;
import com.jivesoftware.os.amza.api.wal.WALIndexProvider;
import com.jivesoftware.os.amza.service.stats.AmzaStats;
import com.jivesoftware.os.amza.service.storage.WALStorage;
import com.jivesoftware.os.amza.service.storage.binary.BinaryHighwaterRowMarshaller;
import com.jivesoftware.os.amza.service.storage.binary.BinaryWALTx;
import com.jivesoftware.os.amza.service.storage.binary.RowIOProvider;
import com.jivesoftware.os.jive.utils.ordered.id.TimestampedOrderIdProvider;
import com.jivesoftware.os.mlogger.core.MetricLogger;
import com.jivesoftware.os.mlogger.core.MetricLoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.Random;
import org.apache.commons.io.FileUtils;
/**
* @author jonathan.colt
*/
public class IndexedWALStorageProvider {
private static final MetricLogger LOG = MetricLoggerFactory.getLogger();
private final Random rand = new Random();
private final AmzaStats amzaStats;
private final File[] workingDirectories;
private final int numberOfStripes;
private final WALIndexProviderRegistry indexProviderRegistry;
private final PrimaryRowMarshaller primaryRowMarshaller;
private final BinaryHighwaterRowMarshaller highwaterRowMarshaller;
private final TimestampedOrderIdProvider orderIdProvider;
private final SickPartitions sickPartitions;
private final int tombstoneCompactionFactor;
private final long rebalanceIfImbalanceGreaterThanInBytes;
public IndexedWALStorageProvider(AmzaStats amzaStats,
File[] workingDirectories,
int numberOfStripes,
WALIndexProviderRegistry indexProviderRegistry,
PrimaryRowMarshaller primaryRowMarshaller,
BinaryHighwaterRowMarshaller highwaterRowMarshaller,
TimestampedOrderIdProvider orderIdProvider,
SickPartitions sickPartitions,
int tombstoneCompactionFactor,
long rebalanceIfImbalanceGreaterThanInBytes) throws IOException {
this.amzaStats = amzaStats;
this.workingDirectories = workingDirectories;
this.numberOfStripes = numberOfStripes;
this.indexProviderRegistry = indexProviderRegistry;
this.primaryRowMarshaller = primaryRowMarshaller;
this.highwaterRowMarshaller = highwaterRowMarshaller;
this.orderIdProvider = orderIdProvider;
this.sickPartitions = sickPartitions;
this.tombstoneCompactionFactor = tombstoneCompactionFactor;
this.rebalanceIfImbalanceGreaterThanInBytes = rebalanceIfImbalanceGreaterThanInBytes;
for (File workingDirectory : workingDirectories) {
if (workingDirectory.exists()) {
for (File file : FileUtils.listFiles(workingDirectory, new String[] { "kvt" }, true)) {
File dest = convert(workingDirectory, file);
if (dest != null) {
if (!dest.getParentFile().mkdirs() && !dest.getParentFile().exists()) {
throw new IOException("Failed to mkdirs for " + dest);
}
if (!file.renameTo(dest)) {
throw new IOException("Failed to rename " + file + " to " + dest);
}
LOG.info("We repaired WAL from {} to {}", file, dest);
}
}
}
}
}
private static File convert(File workingDirectory, File file) {
String workingPath = workingDirectory.getAbsolutePath();
String filename = file.getName();
String partition = filename.substring(0, filename.indexOf('.'));
try {
long partitionVersion = Long.parseLong(partition); // 12345
String trailingPath = file.getAbsolutePath().substring(workingPath.length() + 1); // 0/v15/backup/12345
int firstSlash = trailingPath.indexOf('/'); // 1
int currentModulo = Integer.parseInt(trailingPath.substring(0, firstSlash)); // 0
String partitionPath = trailingPath.substring(firstSlash); // /v15/backup/12345
long actualModulo = hash(partitionVersion) % 1024;
if (currentModulo != actualModulo) {
return new File(workingDirectory + "/" + actualModulo + partitionPath);
} else {
LOG.info("Skipped repair for WAL {}", partition);
}
} catch (NumberFormatException e) {
LOG.info("Did not repair system WAL {}", partition);
}
return null;
}
public static void main(String[] args) {
File from = new File("/example/barf/0/v15/backup/12345.kvt");
File dest = convert(new File("/example/barf"), from);
System.out.println(from.getAbsolutePath() + " to " + (dest == null ? "null" : dest.getAbsolutePath()));
dest = convert(new File("/example/barf/"), from);
System.out.println(from.getAbsolutePath() + " to " + (dest == null ? "null" : dest.getAbsolutePath()));
from = new File("/example/barf/0/v15/backup/AAAAAAA==.kvt");
dest = convert(new File("/example/barf/"), from);
System.out.println(from.getAbsolutePath() + " to " + (dest == null ? "null" : dest.getAbsolutePath()));
int[] count = new int[1024];
for (int i = 0; i < 10_000; i += 2) {
int h = (int) (hash((long) i) % 1024);
count[h]++;
}
for (int i = 0; i < 1024; i++) {
System.out.println(i + ". " + count[i]);
}
}
private String name(VersionedPartitionName versionedPartitionName) throws IOException {
return (versionedPartitionName.getPartitionVersion() == VersionedPartitionName.STATIC_VERSION)
? versionedPartitionName.toBase64()
: String.valueOf(versionedPartitionName.getPartitionVersion());
}
public int rebalanceToStripe(VersionedPartitionName versionedPartitionName, int stripe, PartitionProperties partitionProperties) throws Exception {
int numberOfWorkingDirectories = workingDirectories.length;
long[] freeSpace = new long[numberOfWorkingDirectories];
long maxFree = Long.MIN_VALUE;
int maxFreeIndex = -1;
long minFree = Long.MAX_VALUE;
int minFreeIndex = -1;
for (int i = 0; i < numberOfWorkingDirectories; i++) {
freeSpace[i] = workingDirectories[i].getFreeSpace();
if (freeSpace[i] < minFree) {
minFree = freeSpace[i];
minFreeIndex = i;
}
if (freeSpace[i] > maxFree) {
maxFree = freeSpace[i];
maxFreeIndex = i;
}
}
long imbalance = freeSpace[maxFreeIndex] - freeSpace[minFreeIndex];
if (imbalance > rebalanceIfImbalanceGreaterThanInBytes) {
int maxStripeCount = (numberOfStripes / numberOfWorkingDirectories) + (maxFreeIndex < (numberOfStripes % numberOfWorkingDirectories) ? 1 : 0);
int rebalanceToStripe = maxFreeIndex + (numberOfWorkingDirectories * rand.nextInt(maxStripeCount));
if (stripe % numberOfWorkingDirectories == minFreeIndex && rebalanceToStripe != stripe) {
String providerName = partitionProperties.indexClassName;
@SuppressWarnings("unchecked")
RowIOProvider rowIOProvider = indexProviderRegistry.getRowIOProvider(providerName);
long sizeOfWAL = BinaryWALTx.sizeInBytes(baseKey(versionedPartitionName, stripe), name(versionedPartitionName), rowIOProvider);
if (sizeOfWAL * 2 < rebalanceIfImbalanceGreaterThanInBytes) { // the times 2 says our index shouldn't be any bigger than our wal ;)
return rebalanceToStripe;
}
}
}
return -1;
}
public File baseKey(VersionedPartitionName versionedPartitionName, int stripe) {
return new File(workingDirectories[stripe % workingDirectories.length], String.valueOf(hash(versionedPartitionName) % 1024));
}
private final static long randMult = 0x5DEECE66DL;
private final static long randAdd = 0xBL;
private final static long randMask = (1L << 48) - 1;
private static long hash(VersionedPartitionName versionedPartitionName) {
if (versionedPartitionName.getPartitionName().isSystemPartition()) {
return versionedPartitionName.getPartitionVersion();
} else {
return hash(versionedPartitionName.getPartitionVersion());
}
}
private static long hash(long partitionVersion) {
long x = (partitionVersion * randMult + randAdd) & randMask;
long h = Math.abs(x >>> (16));
if (h >= 0) {
return h;
} else {
return Long.MAX_VALUE;
}
}
public <I extends WALIndex> WALStorage<I> create(
VersionedPartitionName versionedPartitionName,
PartitionProperties partitionProperties) throws Exception {
String providerName = partitionProperties.indexClassName;
@SuppressWarnings("unchecked")
WALIndexProvider<I> walIndexProvider = (WALIndexProvider<I>) indexProviderRegistry.getWALIndexProvider(providerName);
@SuppressWarnings("unchecked")
RowIOProvider rowIOProvider = indexProviderRegistry.getRowIOProvider(providerName);
String name = name(versionedPartitionName);
BinaryWALTx binaryWALTx = new BinaryWALTx(
name,
rowIOProvider,
primaryRowMarshaller,
partitionProperties.updatesBetweenLeaps,
partitionProperties.maxLeaps);
boolean hardFsyncBeforeLeapBoundary = versionedPartitionName.getPartitionName().isSystemPartition();
return new WALStorage<>(amzaStats,
versionedPartitionName,
orderIdProvider,
primaryRowMarshaller,
highwaterRowMarshaller,
binaryWALTx,
walIndexProvider,
sickPartitions,
hardFsyncBeforeLeapBoundary,
tombstoneCompactionFactor);
}
}