package org.infinispan.persistence.sifs;
import java.io.IOException;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import org.infinispan.commons.marshall.Marshaller;
import org.infinispan.util.TimeService;
import org.infinispan.util.logging.Log;
import org.infinispan.util.logging.LogFactory;
/**
* Component keeping the data about log file usage - as soon as entries from some file are overwritten so that the file
* becomes cluttered with old records, the valid records are moved to another file and the old ones are dropped.
* Expired records are moved as tombstones without values (records of entry removal).
*
* @author Radim Vansa <rvansa@redhat.com>
*/
class Compactor extends Thread {
private static final Log log = LogFactory.getLog(Compactor.class);
private static final boolean trace = log.isTraceEnabled();
private final ConcurrentMap<Integer, Stats> fileStats = new ConcurrentHashMap<Integer, Stats>();
private final BlockingQueue<Integer> scheduledCompaction = new LinkedBlockingQueue<Integer>();
private final BlockingQueue<IndexRequest> indexQueue;
private final FileProvider fileProvider;
private final TemporaryTable temporaryTable;
private final Marshaller marshaller;
private final TimeService timeService;
private final int maxFileSize;
private final double compactionThreshold;
private Index index;
// as processing single scheduled compaction takes a lot of time, we don't use the queue to signalize
private volatile boolean clearSignal = false;
private volatile boolean terminateSignal = false;
private volatile CountDownLatch compactorResume;
private volatile CountDownLatch compactorStop;
public Compactor(FileProvider fileProvider,
TemporaryTable temporaryTable,
BlockingQueue<IndexRequest> indexQueue,
Marshaller marshaller, TimeService timeService, int maxFileSize, double compactionThreshold) {
super("BCS-Compactor");
this.fileProvider = fileProvider;
this.temporaryTable = temporaryTable;
this.indexQueue = indexQueue;
this.marshaller = marshaller;
this.timeService = timeService;
this.maxFileSize = maxFileSize;
this.compactionThreshold = compactionThreshold;
this.start();
}
public void setIndex(Index index) {
this.index = index;
}
public void releaseStats(int file) {
fileStats.remove(file);
}
public void free(int file, int size) {
// entries expired from compacted file are reported with file = -1
if (file < 0) return;
recordFreeSpace(getStats(file), file, size);
}
public void completeFile(int file) {
Stats stats = getStats(file);
stats.setCompleted();
if (stats.readyToBeScheduled(compactionThreshold, stats.getFree())) {
schedule(file, stats);
}
}
private Stats getStats(int file) {
Stats stats = fileStats.get(file);
if (stats == null) {
int fileSize = (int) fileProvider.getFileSize(file);
stats = new Stats(fileSize, 0);
Stats other = fileStats.putIfAbsent(file, stats);
if (other != null) {
if (fileSize > other.getTotal()) {
other.setTotal(fileSize);
}
return other;
}
}
if (stats.getTotal() < 0) {
int fileSize = (int) fileProvider.getFileSize(file);
if (fileSize >= 0) {
stats.setTotal(fileSize);
}
}
return stats;
}
private void recordFreeSpace(Stats stats, int file, int size) {
if (stats.addFree(size, compactionThreshold)) {
schedule(file, stats);
}
}
private void schedule(int file, Stats stats) {
try {
synchronized (stats) {
if (!stats.isScheduled()) {
log.debug(String.format("Scheduling file %d for compaction: %d/%d free", file, stats.free.get(), stats.total));
stats.setScheduled();
scheduledCompaction.put(file);
}
}
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
@Override
public void run() {
try {
FileProvider.Log logFile = null;
int currentOffset = 0;
for(;;) {
Integer scheduledFile = null;
try {
scheduledFile = scheduledCompaction.poll(1, TimeUnit.MINUTES);
} catch (InterruptedException e) {
}
if (terminateSignal) {
if (logFile != null) {
logFile.close();
completeFile(logFile.fileId);
}
break;
}
if (clearSignal) {
pauseCompactor(logFile);
logFile = null;
continue;
}
if (scheduledFile == null) {
if (logFile != null) {
logFile.close();
completeFile(logFile.fileId);
logFile = null;
}
continue;
}
log.debugf("Compacting file %d", scheduledFile);
int scheduledOffset = 0;
FileProvider.Handle handle = fileProvider.getFile(scheduledFile);
if (handle == null) {
throw new IllegalStateException("Compactor should not get deleted file for compaction!");
}
try {
while (!clearSignal && !terminateSignal) {
EntryHeader header = EntryRecord.readEntryHeader(handle, scheduledOffset);
if (header == null) {
break;
}
byte[] serializedKey = EntryRecord.readKey(handle, header, scheduledOffset);
Object key = marshaller.objectFromByteBuffer(serializedKey);
int indexedOffset = header.valueLength() > 0 ? scheduledOffset : ~scheduledOffset;
boolean drop = true;
boolean truncate = false;
EntryPosition entry = temporaryTable.get(key);
if (entry != null) {
synchronized (entry) {
if (trace) {
log.tracef("Key for %d:%d was found in temporary table on %d:%d",
scheduledFile, scheduledOffset, entry.file, entry.offset);
}
if (entry.file == scheduledFile && entry.offset == indexedOffset) {
// It's quite unlikely that we would compact a record that is not indexed yet,
// but let's handle that
if (header.expiryTime() >= 0 && header.expiryTime() <= timeService.wallClockTime()) {
truncate = true;
}
} else {
truncate = true;
}
}
// When we have found the entry in temporary table, it's possible that the delete operation
// (that was recorded in temporary table) will arrive to index after DROPPED - in that case
// we could remove the entry and delete would not find it
drop = false;
} else {
EntryInfo info = index.getInfo(key, serializedKey);
assert info != null : String.format("Index does not recognize entry on %d:%d");
assert info.numRecords > 0;
if (info.file == scheduledFile && info.offset == scheduledOffset) {
assert header.valueLength() > 0;
// live record with data
truncate = header.expiryTime() >= 0 && header.expiryTime() <= timeService.wallClockTime();
if (trace) {
log.tracef("Is %d:%d expired? %s, numRecords? %d", scheduledFile, scheduledOffset, truncate, info.numRecords);
}
if (!truncate || info.numRecords > 1) {
drop = false;
}
// Drop only when it is expired and has single record
} else if (info.file == scheduledFile && info.offset == ~scheduledOffset && info.numRecords > 1) {
// just tombstone but there are more non-compacted records for this key so we have to keep it
drop = false;
} else if (trace) {
log.tracef("Key for %d:%d was found in index on %d:%d, %d record => drop",
scheduledFile, scheduledOffset, info.file, info.offset, info.numRecords);
}
}
if (drop) {
if (trace) {
log.tracef("Drop %d:%d (%s)", scheduledFile, (Object)scheduledOffset,
header.valueLength() > 0 ? "record" : "tombstone");
}
indexQueue.put(IndexRequest.dropped(key, serializedKey, scheduledFile, scheduledOffset));
} else {
if (logFile == null || currentOffset + header.totalLength() > maxFileSize) {
if (logFile != null) {
logFile.close();
completeFile(logFile.fileId);
}
currentOffset = 0;
logFile = fileProvider.getFileForLog();
log.debugf("Compacting to %d", (Object) logFile.fileId);
}
byte[] serializedValue = null;
byte[] serializedMetadata = null;
int entryOffset;
int writtenLength;
if (header.valueLength() > 0 && !truncate) {
if (header.metadataLength() > 0) {
serializedMetadata = EntryRecord.readMetadata(handle, header, scheduledOffset);
}
serializedValue = EntryRecord.readValue(handle, header, scheduledOffset);
entryOffset = currentOffset;
writtenLength = header.totalLength();
} else {
entryOffset = ~currentOffset;
writtenLength = EntryHeader.HEADER_SIZE + header.keyLength();
}
EntryRecord.writeEntry(logFile.fileChannel, serializedKey, serializedMetadata, serializedValue, header.seqId(), header.expiryTime());
TemporaryTable.LockedEntry lockedEntry = temporaryTable.replaceOrLock(key, logFile.fileId, entryOffset, scheduledFile, indexedOffset);
if (lockedEntry == null) {
if (trace) {
log.trace("Found entry in temporary table");
}
} else {
boolean update = false;
try {
EntryInfo info = index.getInfo(key, serializedKey);
if (info == null) {
throw new IllegalStateException(String.format(
"%s was not found in index but it was not in temporary table and there's entry on %d:%d", key, scheduledFile, indexedOffset));
} else {
update = info.file == scheduledFile && info.offset == indexedOffset;
}
if (trace) {
log.tracef("In index the key is on %d:%d (%s)", info.file, info.offset, String.valueOf(update));
}
} finally {
if (update) {
temporaryTable.updateAndUnlock(lockedEntry, logFile.fileId, entryOffset);
} else {
temporaryTable.removeAndUnlock(lockedEntry, key);
}
}
}
if (trace) {
log.tracef("Update %d:%d -> %d:%d | %d,%d", scheduledFile, indexedOffset,
logFile.fileId, entryOffset, logFile.fileChannel.position(), logFile.fileChannel.size());
}
// entryFile cannot be used as we have to report the file due to free space statistics
indexQueue.put(IndexRequest.moved(key, serializedKey, logFile.fileId, entryOffset, writtenLength,
scheduledFile, indexedOffset));
currentOffset += writtenLength;
}
scheduledOffset += header.totalLength();
}
} finally {
handle.close();
}
if (terminateSignal) {
if (logFile != null) {
logFile.close();
completeFile(logFile.fileId);
}
return;
} else if (clearSignal) {
pauseCompactor(logFile);
logFile = null;
} else {
// The deletion must be executed only after the index is fully updated.
log.debugf("Finished compacting %d, scheduling delete", scheduledFile);
indexQueue.put(IndexRequest.deleteFileRequest(scheduledFile));
}
}
} catch (Exception e) {
log.error("Compactor failed.", e);
}
}
private void pauseCompactor(FileProvider.Log logFile) throws IOException, InterruptedException {
if (logFile != null) {
logFile.close();
completeFile(logFile.fileId);
}
compactorStop.countDown();
compactorResume.await();
}
public void clearAndPause() throws InterruptedException {
compactorResume = new CountDownLatch(1);
compactorStop = new CountDownLatch(1);
clearSignal = true;
scheduledCompaction.put(-1);
compactorStop.await();
scheduledCompaction.clear();
fileStats.clear();
}
public void resumeAfterPause() {
clearSignal = false;
compactorResume.countDown();
}
public void stopOperations() throws InterruptedException {
terminateSignal = true;
scheduledCompaction.put(-1);
this.join();
}
private static class Stats {
private final AtomicInteger free;
private volatile int total;
/* File is not 'completed' when we have not loaded that yet completely.
Files created by log appender/compactor are completed as soon as it closes them.
File cannot be scheduled for compaction until it's completed.
*/
private volatile boolean completed = false;
private volatile boolean scheduled = false;
private Stats(int total, int free) {
this.free = new AtomicInteger(free);
this.total = total;
}
public int getTotal() {
return total;
}
public void setTotal(int total) {
this.total = total;
}
public boolean addFree(int size, double compactionThreshold) {
int free = this.free.addAndGet(size);
return readyToBeScheduled(compactionThreshold, free);
}
public int getFree() {
return free.get();
}
public boolean readyToBeScheduled(double compactionThreshold, int free) {
int total = this.total;
return completed && !scheduled && total >= 0 && free > total * compactionThreshold;
}
public boolean isScheduled() {
return scheduled;
}
public void setScheduled() {
scheduled = true;
}
public boolean isCompleted() {
return completed;
}
public void setCompleted() {
this.completed = true;
}
}
}