/*
* Copyright 2013 Jive Software, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.jivesoftware.os.amza.service.storage;
import com.jivesoftware.os.amza.api.TimestampedValue;
import com.jivesoftware.os.amza.api.partition.PartitionProperties;
import com.jivesoftware.os.amza.api.partition.VersionedPartitionName;
import com.jivesoftware.os.amza.api.scan.RangeScannable;
import com.jivesoftware.os.amza.api.scan.RowStream;
import com.jivesoftware.os.amza.api.scan.RowsChanged;
import com.jivesoftware.os.amza.api.stream.Commitable;
import com.jivesoftware.os.amza.api.stream.KeyContainedStream;
import com.jivesoftware.os.amza.api.stream.KeyValueStream;
import com.jivesoftware.os.amza.api.stream.UnprefixedWALKeys;
import com.jivesoftware.os.amza.service.stats.AmzaStats;
import com.jivesoftware.os.amza.service.stats.AmzaStats.CompactionFamily;
import com.jivesoftware.os.amza.service.storage.WALStorage.TxTransitionToCompacted;
import com.jivesoftware.os.jive.utils.ordered.id.TimestampedOrderIdProvider;
import com.jivesoftware.os.mlogger.core.MetricLogger;
import com.jivesoftware.os.mlogger.core.MetricLoggerFactory;
import java.io.File;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicLong;
public class PartitionStore implements RangeScannable {
private static final MetricLogger LOG = MetricLoggerFactory.getLogger();
private final AmzaStats amzaStats;
private final TimestampedOrderIdProvider orderIdProvider;
private final VersionedPartitionName versionedPartitionName;
private final WALStorage walStorage;
private final AtomicLong loadedAtDeltaWALId = new AtomicLong(Integer.MIN_VALUE);
private volatile PartitionProperties properties;
public PartitionStore(AmzaStats amzaStats,
TimestampedOrderIdProvider orderIdProvider,
VersionedPartitionName versionedPartitionName,
WALStorage walStorage,
PartitionProperties properties) {
this.amzaStats = amzaStats;
this.orderIdProvider = orderIdProvider;
this.versionedPartitionName = versionedPartitionName;
this.properties = properties;
this.walStorage = walStorage;
}
public PartitionProperties getProperties() {
return properties;
}
public WALStorage getWalStorage() {
return walStorage;
}
public void load(
File baseKey,
long deltaWALId,
long prevDeltaWALId,
int stripe,
ExecutorService executorService) throws Exception {
if (checkIfLoaded(deltaWALId)) {
return;
}
// load as a future so that initialization cannot be interrupted
Future<Boolean> future = executorService.submit(() -> {
if (checkIfLoaded(deltaWALId)) {
return true;
}
synchronized (loadedAtDeltaWALId) {
if (checkIfLoaded(deltaWALId)) {
return true;
}
PartitionProperties stackProperties = this.properties;
boolean backwardScan = !versionedPartitionName.getPartitionName().isSystemPartition();
boolean truncateToEndOfMergeMarker = deltaWALId != -1 && stackProperties.replicated;
walStorage.load(amzaStats.loadIoStats, baseKey, deltaWALId, prevDeltaWALId, backwardScan, truncateToEndOfMergeMarker,
stackProperties.maxValueSizeInIndex, stripe);
if (stackProperties.forceCompactionOnStartup) {
compactTombstone(
true,
baseKey,
baseKey,
stripe,
-1,
(transitionToCompacted) -> {
return transitionToCompacted.tx(() -> {
return null;
});
});
}
loadedAtDeltaWALId.set(deltaWALId);
return true;
}
});
future.get();
}
private boolean checkIfLoaded(long deltaWALId) {
long loaded = loadedAtDeltaWALId.get();
if (deltaWALId > -1) {
if (loaded == -1) {
throw new IllegalStateException("Partition was loaded without a delta before validation. attempted:" + deltaWALId);
} else if (deltaWALId < loaded) {
throw new IllegalStateException("Partition was loaded out of order. attempted:" + deltaWALId + " loaded:" + loaded);
} else if (loaded != Integer.MIN_VALUE && deltaWALId >= loaded) {
return true;
}
} else if (loaded != Integer.MIN_VALUE) {
return true;
}
return false;
}
public boolean isSick() {
return walStorage.isSick();
}
public void flush(boolean fsync) throws Exception {
walStorage.flush(fsync);
}
@Override
public boolean rowScan(KeyValueStream txKeyValueStream, boolean hydrateValues) throws Exception {
return walStorage.rowScan(txKeyValueStream, hydrateValues);
}
@Override
public boolean rangeScan(byte[] fromPrefix, byte[] fromKey, byte[] toPrefix, byte[] toKey, KeyValueStream txKeyValueStream,
boolean hydrateValues) throws Exception {
return walStorage.rangeScan(fromPrefix, fromKey, toPrefix, toKey, txKeyValueStream, hydrateValues);
}
public void compactTombstone(
boolean force,
File fromBaseKey,
File toBaseKey,
int stripe,
long disposalVersion,
TxTransitionToCompacted transitionToCompacted) {
// ageInMillis: 180 days
// intervalMillis: 10 days
// Do I have anything older than (180+10) days?
// If so, then compact everything older than 180 days.
long tombstoneCheckTimestamp = 0;
long tombstoneCompactTimestamp = 0;
long tombstoneCheckVersion = 0;
long tombstoneCompactVersion = 0;
long ttlCheckTimestamp = 0;
long ttlCompactTimestamp = 0;
long ttlCheckVersion = 0;
long ttlCompactVersion = 0;
PartitionProperties stackProperties = this.properties;
if (stackProperties != null) {
if (stackProperties.tombstoneTimestampAgeInMillis > 0) {
tombstoneCheckTimestamp = getTimestampId(stackProperties.tombstoneTimestampAgeInMillis + stackProperties.tombstoneTimestampIntervalMillis);
tombstoneCompactTimestamp = getTimestampId(stackProperties.tombstoneTimestampAgeInMillis);
}
if (stackProperties.tombstoneVersionAgeInMillis > 0) {
tombstoneCheckVersion = getVersion(stackProperties.tombstoneVersionAgeInMillis + stackProperties.tombstoneVersionIntervalMillis);
tombstoneCompactVersion = getVersion(stackProperties.tombstoneVersionAgeInMillis);
}
if (stackProperties.ttlTimestampAgeInMillis > 0) {
ttlCheckTimestamp = getTimestampId(stackProperties.ttlTimestampAgeInMillis + stackProperties.ttlTimestampIntervalMillis);
ttlCompactTimestamp = getTimestampId(stackProperties.ttlTimestampAgeInMillis);
}
if (stackProperties.ttlVersionAgeInMillis > 0) {
ttlCheckVersion = getVersion(stackProperties.ttlVersionAgeInMillis + stackProperties.ttlVersionIntervalMillis);
ttlCompactVersion = getVersion(stackProperties.ttlVersionAgeInMillis);
}
}
try {
if (force || walStorage.compactableTombstone(tombstoneCheckTimestamp, tombstoneCheckVersion, ttlCheckTimestamp, ttlCheckVersion, disposalVersion)) {
String dir = fromBaseKey.toString();
if (!fromBaseKey.equals(toBaseKey)) {
dir = " rebalance " + fromBaseKey + " to " + toBaseKey;
}
String name = versionedPartitionName.toString() + " " + dir + " stripe:" + stripe + " force:" + force;
AmzaStats.CompactionStats compactionStats = amzaStats.beginCompaction(CompactionFamily.tombstone, name);
try {
LOG.info("Compacting tombstoneTimestampId:{} tombstoneVersion:{} ttlTimestampId:{} ttlVersion:{} versionedPartitionName:{}",
tombstoneCompactTimestamp, tombstoneCompactVersion, ttlCompactTimestamp, ttlCompactVersion, versionedPartitionName);
boolean expectedEndOfMerge = !versionedPartitionName.getPartitionName().isSystemPartition();
walStorage.compactTombstone(amzaStats.compactTombstoneIoStats,
compactionStats,
fromBaseKey,
toBaseKey,
stackProperties.rowType,
tombstoneCompactTimestamp,
tombstoneCompactVersion,
ttlCompactTimestamp,
ttlCompactVersion,
disposalVersion,
stackProperties.maxValueSizeInIndex,
stripe,
expectedEndOfMerge,
transitionToCompacted);
} finally {
compactionStats.finished();
}
} else {
LOG.debug("Ignored tombstoneTimestampId:{} tombstoneVersion:{} ttlTimestampId:{} ttlVersion:{} versionedPartitionName:{}",
tombstoneCompactTimestamp, tombstoneCompactVersion, ttlCompactTimestamp, ttlCompactVersion, versionedPartitionName);
}
} catch (Exception x) {
LOG.error("Failed to compact tombstones for partition: {}", new Object[] { versionedPartitionName }, x);
}
}
private long getTimestampId(long timeAgoInMillis) {
return System.currentTimeMillis() - timeAgoInMillis;
}
private long getVersion(long timeAgoInMillis) {
return orderIdProvider.getApproximateId(System.currentTimeMillis() - timeAgoInMillis);
}
public TimestampedValue getTimestampedValue(byte[] prefix, byte[] key) throws Exception {
return walStorage.getTimestampedValue(prefix, key);
}
public boolean streamValues(byte[] prefix, UnprefixedWALKeys keys, KeyValueStream stream) throws Exception {
return walStorage.streamValues(prefix, keys, stream);
}
public boolean containsKey(byte[] prefix, byte[] key) throws Exception {
boolean[] result = new boolean[1];
walStorage.containsKeys(prefix, stream -> stream.stream(key), (_prefix, _key, contained, timestamp, version) -> {
result[0] = contained;
return true;
});
return result[0];
}
public boolean containsKeys(byte[] prefix, UnprefixedWALKeys keys, KeyContainedStream stream) throws Exception {
return walStorage.containsKeys(prefix, keys, stream);
}
public boolean takeRowUpdatesSince(long transactionId, RowStream rowStream) throws Exception {
return walStorage.takeRowUpdatesSince(amzaStats.takeIoStats, transactionId, rowStream);
}
public RowsChanged merge(boolean generateRowsChanged,
PartitionProperties partitionProperties,
long forceTxId,
byte[] prefix,
Commitable updates) throws Exception {
return walStorage.update(amzaStats.mergeIoStats, generateRowsChanged, partitionProperties.rowType, forceTxId, true, prefix, updates);
}
public void updateProperties(PartitionProperties properties) throws Exception {
this.properties = properties;
walStorage.updatedProperties(properties);
}
public long highestTxId() {
return walStorage.highestTxId();
}
public long mergedTxId() {
return walStorage.mergedTxId();
}
public void delete(File baseKey) throws Exception {
walStorage.delete(baseKey);
}
}