/*
* Copyright (c) 2008 - 2011, Jan Stender, Bjoern Kolbeck, Mikael Hoegqvist,
* Felix Hupfeld, Zuse Institute Berlin
*
* Licensed under the BSD License, see LICENSE file for details.
*
*/
package de.mxro.thrd.babudb05.lsmdb;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.nio.channels.ClosedByInterruptException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import de.mxro.thrd.babudb05.api.dev.BabuDBInternal;
import de.mxro.thrd.babudb05.api.dev.CheckpointerInternal;
import de.mxro.thrd.babudb05.api.dev.DatabaseInternal;
import de.mxro.thrd.babudb05.api.dev.SnapshotManagerInternal;
import de.mxro.thrd.babudb05.api.exception.BabuDBException;
import de.mxro.thrd.babudb05.api.exception.BabuDBException.ErrorCode;
import de.mxro.thrd.babudb05.log.DiskLogger;
import de.mxro.thrd.babudb05.snapshots.SnapshotConfig;
import de.mxro.thrd.xstreemfs.foundation.logging.Logging;
import de.mxro.thrd.xstreemfs.foundation.util.OutputUtils;
/**
* This thread regularly checks the size of the database operations log and
* initiates a checkpoint of all databases if necessary.
*
* @author bjko
*/
public class CheckpointerImpl extends CheckpointerInternal {
private final static class MaterializationRequest {
MaterializationRequest(String dbName, int[] snapIDs, SnapshotConfig snap) {
this.dbName = dbName;
this.snapIDs = snapIDs;
this.snap = snap;
}
String dbName;
int[] snapIDs;
SnapshotConfig snap;
}
private static final String RUNTIME_STATE_CPCOUNT = "checkpointer.cpCount";
private static final String RUNTIME_STATE_LASTCP = "checkpointer.lastCpTimestampMillis";
private static final String RUNTIME_STATE_LASTCPDURATION = "checkpointer.lastCpDurationMillis";
private volatile boolean quit;
private final AtomicBoolean suspended = new AtomicBoolean(false);
private final Object suspensionLock = new Object();
private DiskLogger logger;
private long checkInterval;
/**
* Maximum file size of operations log in bytes.
*/
private long maxLogLength;
private final BabuDBInternal dbs;
/**
* a queue containing all snapshot materialization requests that should be
* executed before the next checkpoint is made
*/
private final List<MaterializationRequest> requests = new LinkedList<MaterializationRequest>();
/**
* indicates whether the next checkpoint has been triggered manually or
* automatically
*/
private boolean forceCheckpoint;
/**
* indicates when the current checkpoint is complete and is also a lock
*/
private AtomicBoolean checkpointComplete = new AtomicBoolean(true);
/**
* Flag to notify the disk-logger about a viewId incrementation.
*/
private boolean incrementViewId = false;
private volatile LSN lastWrittenLSN;
private AtomicInteger _checkpointCount = new AtomicInteger();
private AtomicLong _lastCheckpoint = new AtomicLong();
private AtomicLong _lastCheckpointDuration = new AtomicLong();
/**
* Creates a new database checkpointer
*
* @param master
* the database
*/
public CheckpointerImpl(BabuDBInternal master) {
setLifeCycleListener(master);
this.dbs = master;
}
@Override
public void init(DiskLogger logger, int checkInterval, long maxLogLength) throws BabuDBException {
this.logger = logger;
this.checkInterval = 1000L * checkInterval;
this.maxLogLength = maxLogLength;
if (!suspended.compareAndSet(true, false) && !quit) {
start();
try {
waitForStartup();
} catch (Exception e) {
throw new BabuDBException(ErrorCode.INTERNAL_ERROR, e.getMessage(), e);
}
} else {
synchronized (suspensionLock) {
suspensionLock.notify();
}
}
}
/*
* (non-Javadoc)
*
* @see
* org.xtreemfs.babudb.api.dev.CheckpointerInternal#suspendCheckpointing()
*/
@Override
public void suspendCheckpointing() throws InterruptedException {
synchronized (suspended) {
if (!suspended.compareAndSet(false, true)) {
synchronized (this) {
notify();
}
suspended.wait();
}
}
}
/*
* (non-Javadoc)
*
* @see org.xtreemfs.babudb.api.dev.CheckpointerInternal#checkpoint(boolean)
*/
@Override
public LSN checkpoint(boolean incViewId) throws BabuDBException {
synchronized (checkpointComplete) {
checkpointComplete.set(false);
}
// notify the checkpointing thread to immediately process all requests
// in the processing queue
synchronized (this) {
incrementViewId = incViewId;
forceCheckpoint = true;
notify();
}
// wait for the checkpoint to complete
try {
waitForCheckpoint();
return lastWrittenLSN;
} catch (InterruptedException e) {
throw new BabuDBException(ErrorCode.INTERNAL_ERROR, "interrupted", e);
}
}
/*
* (non-Javadoc)
*
* @see org.xtreemfs.babudb.lsmdb.Checkpointer#checkpoint()
*/
@Override
public void checkpoint() throws BabuDBException, InterruptedException {
checkpoint(false);
}
/**
* Materialize all snapshots in the queue before taking a checkpoint.
*
* @throws BabuDBException
* if materialization failed.
*/
protected void materializeSnapshots() throws BabuDBException {
for (;;) {
MaterializationRequest rq = null;
synchronized (requests) {
if (requests.size() > 0)
rq = requests.remove(0);
}
if (rq == null)
break;
if (Logging.isDebug())
Logging.logMessage(Logging.LEVEL_DEBUG, this, "snapshot materialization request found for database '"
+ rq.dbName + "', snapshot: '" + rq.snap.getName() + "'");
SnapshotManagerInternal snapMan = dbs.getSnapshotManager();
// write the snapshot
dbs.getDatabaseManager().getDatabase(rq.dbName)
.proceedWriteSnapshot(rq.snapIDs, snapMan.getSnapshotDir(rq.dbName, rq.snap.getName()), rq.snap);
// notify the snapshot manager about the completion
// of the snapshot
snapMan.snapshotComplete(rq.dbName, rq.snap);
Logging.logMessage(Logging.LEVEL_DEBUG, this, "snapshot materialization complete");
}
}
/**
* Internal method for creating a new database checkpoint. This involves the
* following steps:
* <ol>
* <li>snapshot all indices of all databases
* <li>create new log file for subsequent insertions
* <li>write index snapshots to new on-disk index files
* <li>link new on-disk files to index structures
* <li>delete any obsolete on-disk files
* <li>delete any obsolete log files
* </ol>
* The first two steps need to be sync'ed with new insertions but should be
* very fast. The following steps are performed in a fully asynchronous
* manner.
*
* @throws BabuDBException
* @throws InterruptedException
*/
protected void createCheckpoint() throws BabuDBException, InterruptedException {
Logging.logMessage(Logging.LEVEL_INFO, this, "initiating database checkpoint...");
Collection<DatabaseInternal> databases = dbs.getDatabaseManager().getDatabaseList();
try {
int[][] snapIds = new int[databases.size()][];
int i = 0;
try {
// critical block...
logger.lock();
for (DatabaseInternal db : databases) {
snapIds[i++] = db.proceedCreateSnapshot();
}
lastWrittenLSN = logger.switchLogFile(incrementViewId);
incrementViewId = false;
} finally {
if (logger.hasLock())
logger.unlock();
}
i = 0;
for (DatabaseInternal db : databases) {
db.proceedWriteSnapshot(lastWrittenLSN.getViewId(), lastWrittenLSN.getSequenceNo(), snapIds[i++]);
db.proceedCleanupSnapshot(lastWrittenLSN.getViewId(), lastWrittenLSN.getSequenceNo());
}
// delete all logfile with LSN <= lastWrittenLSN
File f = new File(dbs.getConfig().getDbLogDir());
String[] logs = f.list(new FilenameFilter() {
public boolean accept(File dir, String name) {
return name.endsWith(".dbl");
}
});
if (logs != null) {
Pattern p = Pattern.compile("(\\d+)\\.(\\d+)\\.dbl");
for (String log : logs) {
Matcher m = p.matcher(log);
m.matches();
String tmp = m.group(1);
int viewId = Integer.valueOf(tmp);
tmp = m.group(2);
int seqNo = Integer.valueOf(tmp);
LSN logLSN = new LSN(viewId, seqNo);
if (logLSN.compareTo(lastWrittenLSN) <= 0) {
Logging.logMessage(Logging.LEVEL_DEBUG, this, "deleting old db log file: " + log);
f = new File(dbs.getConfig().getDbLogDir() + log);
if (!f.delete())
Logging.logMessage(Logging.LEVEL_WARN, this, "could not delete log file: %s",
f.getAbsolutePath());
}
}
}
} catch (IOException ex) {
throw new BabuDBException(ErrorCode.IO_ERROR, "cannot create checkpoint", ex);
}
Logging.logMessage(Logging.LEVEL_INFO, this, "checkpoint complete");
}
/*
* (non-Javadoc)
*
* @see org.xtreemfs.babudb.api.dev.CheckpointerInternal#
* addSnapshotMaterializationRequest( java.lang.String, int[],
* org.xtreemfs.babudb.snapshots.SnapshotConfig)
*/
@Override
public void addSnapshotMaterializationRequest(String dbName, int[] snapIds, SnapshotConfig snap) {
synchronized (requests) {
requests.add(new MaterializationRequest(dbName, snapIds, snap));
}
}
/*
* (non-Javadoc)
*
* @see org.xtreemfs.babudb.api.dev.CheckpointerInternal#
* removeSnapshotMaterializationRequest( java.lang.String, java.lang.String)
*/
@Override
public void removeSnapshotMaterializationRequest(String dbName, String snapshotName) {
synchronized (requests) {
Iterator<MaterializationRequest> it = requests.iterator();
while (it.hasNext()) {
MaterializationRequest rq = it.next();
if (snapshotName.equals(rq.snap.getName())) {
requests.remove(rq);
break;
}
}
}
}
@Override
public synchronized void shutdown() {
quit = true;
interrupt();
}
public void run() {
Logging.logMessage(Logging.LEVEL_DEBUG, this, "operational");
boolean manualCheckpoint = false;
notifyStarted();
while (!quit) {
try {
synchronized (this) {
if (!forceCheckpoint) {
wait(checkInterval);
}
manualCheckpoint = forceCheckpoint;
forceCheckpoint = false;
}
// this block allows to suspend the Checkpointer from taking
// checkpoints until it has been re-init()
synchronized (suspended) {
if (suspended.get()) {
// lock
suspended.notify();
synchronized (suspensionLock) {
suspensionLock.wait();
}
continue;
}
}
final long lfsize = logger.getLogFileSize();
if (manualCheckpoint || lfsize > maxLogLength) {
if (!manualCheckpoint) {
Logging.logMessage(Logging.LEVEL_INFO, this, "database operation log has exceeded threshold "
+ "size of " + maxLogLength + " (" + lfsize + ")");
} else {
Logging.logMessage(Logging.LEVEL_INFO, this, "triggered manual checkpoint");
}
synchronized (dbs.getDatabaseManager().getDBModificationLock()) {
synchronized (this) {
long start = System.currentTimeMillis();
materializeSnapshots();
createCheckpoint();
// update statistics
_checkpointCount.incrementAndGet();
_lastCheckpoint.set(System.currentTimeMillis());
_lastCheckpointDuration.set(System.currentTimeMillis() - start);
}
}
}
} catch (InterruptedException ex) {
if (quit)
break;
else
Logging.logMessage(Logging.LEVEL_DEBUG, this, "CHECKPOINT WAS ABORTED!");
} catch (Throwable ex) {
if (ex instanceof BabuDBException
&& ((BabuDBException) ex).getCause() instanceof ClosedByInterruptException) {
Logging.logMessage(Logging.LEVEL_DEBUG, this, "CHECKPOINT WAS ABORTED!");
} else {
Logging.logMessage(Logging.LEVEL_ERROR, this, "DATABASE CHECKPOINT CREATION FAILURE!");
Logging.logMessage(Logging.LEVEL_ERROR, this, OutputUtils.stackTraceToString(ex));
}
} finally {
synchronized (checkpointComplete) {
checkpointComplete.set(true);
checkpointComplete.notify();
}
}
}
Logging.logMessage(Logging.LEVEL_DEBUG, this, "checkpointer shut down " + "successfully");
notifyStopped();
}
/*
* (non-Javadoc)
*
* @see org.xtreemfs.babudb.api.Checkpointer#waitForCheckpoint()
*/
@Override
public void waitForCheckpoint() throws InterruptedException {
synchronized (checkpointComplete) {
while (!checkpointComplete.get()) {
checkpointComplete.wait();
}
}
}
@Override
public Object getRuntimeState(String property) {
if (RUNTIME_STATE_CPCOUNT.equals(property))
return _checkpointCount.get();
if (RUNTIME_STATE_LASTCP.equals(property))
return _lastCheckpoint.get();
if (RUNTIME_STATE_LASTCPDURATION.equals(property))
return _lastCheckpointDuration.get();
return null;
}
@Override
public Map<String, Object> getRuntimeState() {
Map<String, Object> map = new HashMap<String, Object>();
map.put(RUNTIME_STATE_CPCOUNT, _checkpointCount.get());
map.put(RUNTIME_STATE_LASTCP, _lastCheckpoint.get());
map.put(RUNTIME_STATE_LASTCPDURATION, _lastCheckpointDuration.get());
return map;
}
}