package com.jivesoftware.os.amza.service.storage;
import com.google.common.base.Preconditions;
import com.jivesoftware.os.amza.api.TimestampedValue;
import com.jivesoftware.os.amza.api.partition.Durability;
import com.jivesoftware.os.amza.api.partition.HighestPartitionTx;
import com.jivesoftware.os.amza.api.partition.PartitionName;
import com.jivesoftware.os.amza.api.partition.VersionedAquarium;
import com.jivesoftware.os.amza.api.partition.VersionedPartitionName;
import com.jivesoftware.os.amza.api.scan.RowChanges;
import com.jivesoftware.os.amza.api.scan.RowStream;
import com.jivesoftware.os.amza.api.scan.RowsChanged;
import com.jivesoftware.os.amza.api.stream.Commitable;
import com.jivesoftware.os.amza.api.stream.KeyContainedStream;
import com.jivesoftware.os.amza.api.stream.KeyValueStream;
import com.jivesoftware.os.amza.api.stream.RowType;
import com.jivesoftware.os.amza.api.stream.TxKeyValueStream;
import com.jivesoftware.os.amza.api.stream.UnprefixedWALKeys;
import com.jivesoftware.os.amza.api.take.Highwaters;
import com.jivesoftware.os.amza.api.wal.PrimaryRowMarshaller;
import com.jivesoftware.os.amza.api.wal.WALHighwater;
import com.jivesoftware.os.amza.api.wal.WALUpdated;
import com.jivesoftware.os.amza.service.replication.AsyncStripeFlusher;
import com.jivesoftware.os.amza.service.replication.PartitionStripe;
import com.jivesoftware.os.amza.service.stats.AmzaStats;
import com.jivesoftware.os.aquarium.LivelyEndState;
/**
* @author jonathan.colt
*/
public class SystemWALStorage {
private final AmzaStats amzaStats;
private final PartitionIndex partitionIndex;
private final PrimaryRowMarshaller rowMarshaller;
private final HighwaterRowMarshaller<byte[]> highwaterRowMarshaller;
private final RowChanges allRowChanges;
private final AsyncStripeFlusher systemFlusher;
private final boolean hardFlush;
public SystemWALStorage(AmzaStats amzaStats,
PartitionIndex partitionIndex,
PrimaryRowMarshaller rowMarshaller,
HighwaterRowMarshaller<byte[]> highwaterRowMarshaller,
RowChanges allRowChanges,
AsyncStripeFlusher systemFlusher,
boolean hardFlush) {
this.amzaStats = amzaStats;
this.partitionIndex = partitionIndex;
this.rowMarshaller = rowMarshaller;
this.highwaterRowMarshaller = highwaterRowMarshaller;
this.allRowChanges = allRowChanges;
this.systemFlusher = systemFlusher;
this.hardFlush = hardFlush;
}
public void load(Iterable<VersionedPartitionName> systemPartitionNames, HighestPartitionTx tx) throws Exception {
for (VersionedPartitionName versionedPartitionName : systemPartitionNames) {
long highestTxId = highestPartitionTxId(versionedPartitionName);
tx.tx(new VersionedAquarium(versionedPartitionName, null), highestTxId);
}
}
public RowsChanged update(VersionedPartitionName versionedPartitionName,
byte[] prefix,
Commitable updates,
WALUpdated updated) throws Exception {
PartitionName partitionName = versionedPartitionName.getPartitionName();
Preconditions.checkArgument(partitionName.isSystemPartition(), "Must be a system partition");
PartitionStore partitionStore = partitionIndex.getSystemPartition(versionedPartitionName);
RowsChanged changed = partitionStore.getWalStorage().update(amzaStats.updateIoStats,
true,
partitionStore.getProperties().rowType,
-1,
false,
prefix,
updates);
if (allRowChanges != null && !changed.isEmpty()) {
allRowChanges.changes(changed);
}
if (!changed.getApply().isEmpty()) {
//LOG.info("UPDATED:{} txId:{}", versionedPartitionName, changed.getLargestCommittedTxId());
updated.updated(versionedPartitionName, changed.getLargestCommittedTxId());
}
partitionStore.flush(hardFlush);
systemFlusher.forceFlush(Durability.fsync_async, 0);
amzaStats.direct(partitionName, changed.getApply().size(), changed.getSmallestCommittedTxId());
return changed;
}
public void flush(VersionedPartitionName versionedPartitionName) throws Exception {
PartitionStore partitionStore = partitionIndex.getSystemPartition(versionedPartitionName);
partitionStore.flush(true);
}
public TimestampedValue getTimestampedValue(VersionedPartitionName versionedPartitionName, byte[] prefix, byte[] key) throws Exception {
Preconditions.checkArgument(versionedPartitionName.getPartitionName().isSystemPartition(), "Must be a system partition");
long start = System.currentTimeMillis();
TimestampedValue timestampedValue = partitionIndex.getSystemPartition(versionedPartitionName).getTimestampedValue(prefix, key);
amzaStats.gets(versionedPartitionName.getPartitionName(), 1, System.currentTimeMillis() - start);
return timestampedValue;
}
public boolean get(VersionedPartitionName versionedPartitionName,
byte[] prefix,
UnprefixedWALKeys keys,
KeyValueStream stream) throws Exception {
Preconditions.checkArgument(versionedPartitionName.getPartitionName().isSystemPartition(), "Must be a system partition");
long start = System.currentTimeMillis();
boolean got = partitionIndex.getSystemPartition(versionedPartitionName).streamValues(prefix, keys,
(_prefix, key, value, valueTimestamp, valueTombstone, valueVersion) -> {
if (valueTimestamp == -1) {
return stream.stream(prefix, key, null, -1, false, -1);
} else {
return stream.stream(prefix, key, value, valueTimestamp, valueTombstone, valueVersion);
}
});
amzaStats.gets(versionedPartitionName.getPartitionName(), 1, System.currentTimeMillis() - start);
return got;
}
public boolean containsKeys(VersionedPartitionName versionedPartitionName,
byte[] prefix,
UnprefixedWALKeys keys,
KeyContainedStream stream) throws Exception {
Preconditions.checkArgument(versionedPartitionName.getPartitionName().isSystemPartition(), "Must be a system partition");
return partitionIndex.getSystemPartition(versionedPartitionName).containsKeys(prefix, keys, stream);
}
public <R> R takeRowUpdatesSince(VersionedPartitionName versionedPartitionName,
long transactionId,
PartitionStripe.TakeRowUpdates<R> takeRowUpdates) throws Exception {
Preconditions.checkArgument(versionedPartitionName.getPartitionName().isSystemPartition(), "Must be a system partition");
PartitionStore partitionStore = partitionIndex.getSystemPartition(versionedPartitionName);
PartitionStripe.RowStreamer streamer = rowStream -> partitionStore.takeRowUpdatesSince(transactionId, rowStream);
return takeRowUpdates.give(versionedPartitionName, LivelyEndState.ALWAYS_ONLINE, streamer);
}
public boolean takeFromTransactionId(VersionedPartitionName versionedPartitionName,
long transactionId,
Highwaters highwaters,
TxKeyValueStream txKeyValueStream)
throws Exception {
Preconditions.checkArgument(versionedPartitionName.getPartitionName().isSystemPartition(), "Must be a system partition");
return partitionIndex.getSystemPartition(versionedPartitionName).getWalStorage().takeRowUpdatesSince(amzaStats.takeIoStats, transactionId,
(rowFP, rowTxId, rowType, row) -> {
if (rowType == RowType.highwater && highwaters != null) {
WALHighwater highwater = highwaterRowMarshaller.fromBytes(row);
highwaters.highwater(highwater);
} else if (rowType.isPrimary() && rowTxId > transactionId) {
return rowMarshaller.fromRows(txFpRowStream -> txFpRowStream.stream(rowTxId, rowFP, rowType, row), txKeyValueStream);
}
return true;
});
}
public boolean takeFromTransactionId(VersionedPartitionName versionedPartitionName,
byte[] prefix,
long transactionId,
Highwaters highwaters,
TxKeyValueStream txKeyValueStream)
throws Exception {
Preconditions.checkArgument(versionedPartitionName.getPartitionName().isSystemPartition(), "Must be a system partition");
return partitionIndex.getSystemPartition(versionedPartitionName).getWalStorage().takeRowUpdatesSince(prefix, transactionId,
(rowFP, rowTxId, rowType, row) -> {
if (rowType == RowType.highwater && highwaters != null) {
WALHighwater highwater = highwaterRowMarshaller.fromBytes(row);
highwaters.highwater(highwater);
} else if (rowType.isPrimary() && rowTxId > transactionId) {
return rowMarshaller.fromRows(txFpRowStream -> txFpRowStream.stream(rowTxId, rowFP, rowType, row), txKeyValueStream);
}
return true;
});
}
public boolean takeRowsFromTransactionId(VersionedPartitionName versionedPartitionName, long transactionId, RowStream rowStream)
throws Exception {
Preconditions.checkArgument(versionedPartitionName.getPartitionName().isSystemPartition(), "Must be a system partition");
return partitionIndex.getSystemPartition(versionedPartitionName).getWalStorage().takeRowUpdatesSince(amzaStats.takeIoStats, transactionId, rowStream);
}
public boolean rowScan(VersionedPartitionName versionedPartitionName, KeyValueStream keyValueStream, boolean hydrateValues) throws Exception {
Preconditions.checkArgument(versionedPartitionName.getPartitionName().isSystemPartition(), "Must be a system partition");
PartitionStore partitionStore = partitionIndex.getSystemPartition(versionedPartitionName);
if (partitionStore == null) {
throw new IllegalStateException("No partition defined for " + versionedPartitionName);
} else {
long start = System.currentTimeMillis();
boolean got = partitionStore.getWalStorage().rowScan(keyValueStream, hydrateValues);
if (hydrateValues) {
amzaStats.scans(versionedPartitionName.getPartitionName(), 1, System.currentTimeMillis() - start);
} else {
amzaStats.scanKeys(versionedPartitionName.getPartitionName(), 1, System.currentTimeMillis() - start);
}
return got;
}
}
public boolean rangeScan(VersionedPartitionName versionedPartitionName,
byte[] fromPrefix,
byte[] fromKey,
byte[] toPrefix,
byte[] toKey,
KeyValueStream keyValueStream,
boolean hydrateValues) throws Exception {
Preconditions.checkArgument(versionedPartitionName.getPartitionName().isSystemPartition(), "Must be a system partition");
PartitionStore partitionStore = partitionIndex.getSystemPartition(versionedPartitionName);
if (partitionStore == null) {
throw new IllegalStateException("No partition defined for " + versionedPartitionName);
} else {
long start = System.currentTimeMillis();
boolean got = partitionStore.getWalStorage().rangeScan(fromPrefix, fromKey, toPrefix, toKey, keyValueStream, hydrateValues);
if (hydrateValues) {
amzaStats.scans(versionedPartitionName.getPartitionName(), 1, System.currentTimeMillis() - start);
} else {
amzaStats.scanKeys(versionedPartitionName.getPartitionName(), 1, System.currentTimeMillis() - start);
}
return got;
}
}
public long highestPartitionTxId(VersionedPartitionName versionedPartitionName) throws Exception {
Preconditions.checkArgument(versionedPartitionName.getPartitionName().isSystemPartition(), "Must be a system partition");
PartitionStore partitionStore = partitionIndex.getSystemPartition(versionedPartitionName);
if (partitionStore != null) {
return partitionStore.getWalStorage().highestTxId();
} else {
return -1;
}
}
public long count(VersionedPartitionName versionedPartitionName) throws Exception {
Preconditions.checkArgument(versionedPartitionName.getPartitionName().isSystemPartition(), "Must be a system partition");
return partitionIndex.getSystemPartition(versionedPartitionName).getWalStorage().count(keyStream -> true);
}
public long approximateCount(VersionedPartitionName versionedPartitionName) throws Exception {
Preconditions.checkArgument(versionedPartitionName.getPartitionName().isSystemPartition(), "Must be a system partition");
return partitionIndex.getSystemPartition(versionedPartitionName).getWalStorage().approximateCount();
}
}