/*
* eXist Open Source Native XML Database
* Copyright (C) 2001-04 The eXist Project
* http://exist-db.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* $Id$
*/
package org.exist.storage.journal;
import java.io.*;
import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.text.DateFormat;
import java.util.Optional;
import java.util.stream.Stream;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.exist.EXistException;
import org.exist.config.annotation.ConfigurationClass;
import org.exist.config.annotation.ConfigurationFieldAsAttribute;
import org.exist.storage.BrokerPool;
import org.exist.storage.lock.FileLock;
import org.exist.storage.txn.Checkpoint;
import org.exist.util.FileUtils;
import org.exist.util.ReadOnlyException;
import org.exist.util.sanity.SanityCheck;
/**
* Manages the journalling log. The database uses one central journal for
* all data files. If the journal exceeds the predefined maximum size, a new file is created.
* Every journal file has a unique number, which keeps growing during the lifetime of the db.
* The name of the file corresponds to the file number. The file with the highest
* number will be used for recovery.
*
* A buffer is used to temporarily buffer journal entries. To guarantee consistency, the buffer will be flushed
* and the journal is synched after every commit or whenever a db page is written to disk.
*
* Each entry has the structure:
*
* <pre>[byte: entryType, long: transactionId, short length, byte[] data, short backLink]</pre>
*
* <ul>
* <li>entryType is a unique id that identifies the log record. Entry types are registered via the
* {@link org.exist.storage.journal.LogEntryTypes} class.</li>
* <li>transactionId: the id of the transaction that created the record.</li>
* <li>length: the length of the log entry data.</li>
* <li>data: the payload data provided by the {@link org.exist.storage.journal.Loggable} object.</li>
* <li>backLink: offset to the start of the record. Used when scanning the log file backwards.</li>
* </ul>
*
* @author wolf
*/
@ConfigurationClass("journal")
//TODO: conf.xml refactoring <recovery> => <recovery><journal/></recovery>
public class Journal {
/**
* Logger for this class
*/
private static final Logger LOG = LogManager.getLogger(Journal.class);
public final static String RECOVERY_SYNC_ON_COMMIT_ATTRIBUTE = "sync-on-commit";
public final static String RECOVERY_JOURNAL_DIR_ATTRIBUTE = "journal-dir";
public final static String RECOVERY_SIZE_LIMIT_ATTRIBUTE = "size";
public final static String PROPERTY_RECOVERY_SIZE_LIMIT = "db-connection.recovery.size-limit";
public final static String PROPERTY_RECOVERY_JOURNAL_DIR = "db-connection.recovery.journal-dir";
public final static String PROPERTY_RECOVERY_SYNC_ON_COMMIT = "db-connection.recovery.sync-on-commit";
public final static String LOG_FILE_SUFFIX = "log";
public final static String BAK_FILE_SUFFIX = ".bak";
public final static String LCK_FILE = "journal.lck";
/** the length of the header of each entry: entryType + transactionId + length */
public final static int LOG_ENTRY_HEADER_LEN = 11;
/** header length + trailing back link */
public final static int LOG_ENTRY_BASE_LEN = LOG_ENTRY_HEADER_LEN + 2;
/** default maximum journal size */
public final static int DEFAULT_MAX_SIZE = 10; //MB
/** minimal size the journal needs to have to be replaced by a new file during a checkpoint */
private static final long MIN_REPLACE = 1024 * 1024;
/**
* size limit for the journal file. A checkpoint will be triggered if the file
* exceeds this size limit.
*/
@ConfigurationFieldAsAttribute("size")
//TODO: conf.xml refactoring <recovery size=""> => <journal size="">
private final int journalSizeLimit;
/** the current output channel
* Only valid after switchFiles() was called at least once! */
private FileOutputStream os;
private FileChannel channel;
/** Synching the journal is done by a background thread */
private final FileSyncThread syncThread;
/** latch used to synchronize writes to the channel */
private final Object latch = new Object();
/** the data directory where journal files are written to */
@ConfigurationFieldAsAttribute("journal-dir")
//TODO: conf.xml refactoring <recovery journal-dir=""> => <journal dir="">
private final Path dir;
private FileLock fileLock;
/** the current file number */
private int currentFile = 0;
/** used to keep track of the current position in the file */
private int inFilePos = 0;
/** temp buffer */
private ByteBuffer currentBuffer;
/** the last LSN written by the JournalManager */
private long currentLsn = Lsn.LSN_INVALID;
/** the last LSN actually written to the file */
private long lastLsnWritten = Lsn.LSN_INVALID;
/** stores the current LSN of the last file sync on the file */
private long lastSyncLsn = Lsn.LSN_INVALID;
/** set to true while recovery is in progress */
private boolean inRecovery = false;
/** the {@link BrokerPool} that created this manager */
private final BrokerPool pool;
/** if set to true, a sync will be triggered on the log file after every commit */
@ConfigurationFieldAsAttribute("sync-on-commit")
//TODO: conf.xml refactoring <recovery sync-on-commit=""> => <journal sync-on-commit="">
private final static boolean DEFAULT_SYNC_ON_COMMIT = true;
private final boolean syncOnCommit;
private final Path fsJournalDir;
public Journal(final BrokerPool pool, final Path directory) throws EXistException {
this.pool = pool;
this.fsJournalDir = directory.resolve("fs.journal");
// we use a 1 megabyte buffer:
currentBuffer = ByteBuffer.allocateDirect(1024 * 1024);
syncThread = new FileSyncThread(latch);
syncThread.start();
this.syncOnCommit = pool.getConfiguration().getProperty(PROPERTY_RECOVERY_SYNC_ON_COMMIT, DEFAULT_SYNC_ON_COMMIT);
if (LOG.isDebugEnabled()) {
LOG.debug("SyncOnCommit = " + syncOnCommit);
}
final Optional<Path> logDir = Optional.ofNullable((Path) pool.getConfiguration().getProperty(PROPERTY_RECOVERY_JOURNAL_DIR));
if (logDir.isPresent()) {
Path f = logDir.get();
if (!f.isAbsolute()) {
f = pool.getConfiguration().getExistHome()
.map(h -> Optional.of(h.resolve(logDir.get())))
.orElse(pool.getConfiguration().getConfigFilePath().map(p -> p.getParent().resolve(logDir.get())))
.orElse(f);
}
if (!Files.exists(f)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Output directory for journal files does not exist. Creating " + f.toAbsolutePath().toString());
}
try {
Files.createDirectories(f);
} catch (final IOException | SecurityException e) {
throw new EXistException("Failed to create output directory: " + f.toAbsolutePath().toString());
}
}
if (!Files.isWritable(f)) {
throw new EXistException("Cannot write to journal output directory: " + f.toAbsolutePath().toString());
}
this.dir = f;
} else {
this.dir = directory;
}
if (LOG.isDebugEnabled()) {
LOG.debug("Using directory for the journal: " + dir.toAbsolutePath().toString());
}
this.journalSizeLimit = 1024 * 1024 * pool.getConfiguration().getProperty(PROPERTY_RECOVERY_SIZE_LIMIT, DEFAULT_MAX_SIZE);
}
public void initialize() throws EXistException, ReadOnlyException {
final Path lck = dir.resolve(LCK_FILE);
fileLock = new FileLock(pool, lck);
final boolean locked = fileLock.tryLock();
if (!locked) {
final String lastHeartbeat =
DateFormat.getDateTimeInstance(DateFormat.MEDIUM, DateFormat.MEDIUM)
.format(fileLock.getLastHeartbeat());
throw new EXistException("The journal log directory seems to be locked by another " +
"eXist process. A lock file: " + lck.toAbsolutePath().toString() + " is present in the " +
"log directory. Last access to the lock file: " + lastHeartbeat);
}
}
/**
* Write a log entry to the journalling log.
*
* @param loggable
* @throws JournalException
*/
public synchronized void writeToLog(final Loggable loggable) throws JournalException {
if (currentBuffer == null) {
throw new JournalException("Database is shut down.");
}
SanityCheck.ASSERT(!inRecovery, "Write to log during recovery. Should not happen!");
final int size = loggable.getLogSize();
final int required = size + LOG_ENTRY_BASE_LEN;
if (required > currentBuffer.remaining()) {
flushToLog(false);
}
currentLsn = Lsn.create(currentFile, inFilePos + currentBuffer.position() + 1);
loggable.setLsn(currentLsn);
try {
currentBuffer.put(loggable.getLogType());
currentBuffer.putLong(loggable.getTransactionId());
currentBuffer.putShort((short) loggable.getLogSize());
loggable.write(currentBuffer);
currentBuffer.putShort((short) (size + LOG_ENTRY_HEADER_LEN));
} catch (final BufferOverflowException e) {
throw new JournalException("Buffer overflow while writing log record: " + loggable.dump(), e);
}
pool.getTransactionManager().trackOperation(loggable.getTransactionId());
}
/**
* Returns the last LSN physically written to the journal.
*
* @return last written LSN
*/
public long lastWrittenLsn() {
return lastLsnWritten;
}
/**
* Flush the current buffer to disk. If fsync is true, a sync will
* be called on the file to force all changes to disk.
*
* @param fsync forces all changes to disk if true and syncMode is set to SYNC_ON_COMMIT.
*/
public void flushToLog(final boolean fsync) {
flushToLog(fsync, false);
}
/**
* Flush the current buffer to disk. If fsync is true, a sync will
* be called on the file to force all changes to disk.
*
* @param fsync forces all changes to disk if true and syncMode is set to SYNC_ON_COMMIT.
* @param forceSync force changes to disk even if syncMode doesn't require it.
*/
public synchronized void flushToLog(final boolean fsync, final boolean forceSync) {
if (inRecovery) {
return;
}
flushBuffer();
if (forceSync || (fsync && syncOnCommit && currentLsn > lastSyncLsn)) {
syncThread.triggerSync();
lastSyncLsn = currentLsn;
}
try {
if (channel != null && channel.size() >= journalSizeLimit) {
pool.triggerCheckpoint();
}
} catch (final IOException e) {
LOG.warn("Failed to trigger checkpoint!", e);
}
}
/**
*
*/
private void flushBuffer() {
if (currentBuffer == null || channel == null) {
return; // the db has probably been shut down already or not fully initialized
}
synchronized (latch) {
try {
if (currentBuffer.position() > 0) {
currentBuffer.flip();
final int size = currentBuffer.remaining();
while (currentBuffer.hasRemaining()) {
channel.write(currentBuffer);
}
inFilePos += size;
lastLsnWritten = currentLsn;
}
} catch (final IOException e) {
LOG.warn("Flushing log file failed!", e);
} finally {
currentBuffer.clear();
}
}
}
/**
* Write a checkpoint record to the journal and flush it. If switchLogFiles is true,
* a new journal will be started, but only if the file is larger than
* {@link #MIN_REPLACE}. The old log is removed.
*
* @param txnId The transaction id
* @param switchLogFiles Indicates whether a new journal file should be started
* @throws JournalException
*/
public void checkpoint(final long txnId, final boolean switchLogFiles) throws JournalException {
LOG.debug("Checkpoint reached");
writeToLog(new Checkpoint(txnId));
if (switchLogFiles) {
// if we switch files, we don't need to sync.
// the file will be removed anyway.
flushBuffer();
} else {
flushToLog(true, true);
}
try {
if (switchLogFiles && channel != null && channel.position() > MIN_REPLACE) {
final Path oldFile = getFile(currentFile);
final RemoveThread rt = new RemoveThread(channel, oldFile);
try {
switchFiles();
} catch (final LogException e) {
LOG.warn("Failed to create new journal: " + e.getMessage(), e);
}
rt.start();
}
clearBackupFiles();
} catch (final IOException e) {
LOG.warn("IOException while writing checkpoint", e);
}
}
/**
* Set the file number of the last file used.
*
* @param fileNum the log file number
*/
public void setCurrentFileNum(final int fileNum) {
currentFile = fileNum;
}
public void clearBackupFiles() {
if(Files.exists(fsJournalDir)) {
try (final Stream<Path> backupFiles = Files.list(fsJournalDir)) {
backupFiles.forEach(p -> {
LOG.info("Checkpoint deleting: " + p.toAbsolutePath().toString());
if (!FileUtils.deleteQuietly(p)) {
LOG.fatal("Cannot delete file '" + p.toAbsolutePath().toString() + "' from backup journal.");
}
});
} catch (final IOException ioe) {
LOG.error("Could not clear fs.journal backup files", ioe);
}
}
}
/**
* Create a new journal with a larger file number
* than the previous file.
*
* @throws LogException
*/
public void switchFiles() throws LogException {
++currentFile;
final String fname = getFileName(currentFile);
final Path file = dir.resolve(fname);
if (Files.exists(file)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Journal file " + file.toAbsolutePath() + " already exists. Copying it.");
}
try {
final Path renamed = Files.move(file, file.resolveSibling(FileUtils.fileName(file) + BAK_FILE_SUFFIX), StandardCopyOption.ATOMIC_MOVE);
if(LOG.isDebugEnabled()) {
LOG.debug("Old file renamed from '" + file.toAbsolutePath().toString() + "' to '" + renamed.toAbsolutePath().toString() + "'");
}
} catch(final IOException ioe) {
LOG.warn(ioe); //TODO(AR) should probably be an LogException but wasn't previously!
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Creating new journal: " + file.toAbsolutePath().toString());
}
synchronized (latch) {
close();
try {
//RandomAccessFile raf = new RandomAccessFile(file, "rw");
os = new FileOutputStream(file.toFile(), true);
channel = os.getChannel();
syncThread.setChannel(channel);
} catch (final FileNotFoundException e) {
throw new LogException("Failed to open new journal: " + file.toAbsolutePath().toString(), e);
}
}
inFilePos = 0;
}
public void close() {
if (channel != null) {
try {
channel.close();
} catch (final IOException e) {
LOG.warn("Failed to close journal", e);
}
}
if (os != null) {
try {
os.close();
} catch (final IOException e) {
LOG.warn("Failed to close journal", e);
}
}
}
private static int journalFileNum(final Path path) {
final String fileName = FileUtils.fileName(path);
final int p = fileName.indexOf('.');
final String baseName = fileName.substring(0, p);
return Integer.parseInt(baseName, 16);
}
/**
* Find the journal file with the highest file number.
*
* @param files
*/
public final static int findLastFile(final Stream<Path> files) {
return files
.map(Journal::journalFileNum)
.max(Integer::max)
.orElse(-1);
}
/**
* Returns a Stream of all journal files found in the data directory.
*
* @return A Stream of all journal files. NOTE - This is
* an I/O Stream and so you are responsible for closing it!
*/
public Stream<Path> getFiles() throws IOException {
final String suffix = '.' + LOG_FILE_SUFFIX;
final String indexSuffix = "_index" + suffix;
return Files.find(dir, 1, (path, attrs) ->
attrs.isRegularFile() &&
FileUtils.fileName(path).endsWith(suffix) &&
!FileUtils.fileName(path).endsWith(indexSuffix));
}
/**
* Returns the file corresponding to the specified
* file number.
*
* @param fileNum
*/
public Path getFile(final int fileNum) {
return dir.resolve(getFileName(fileNum));
}
/**
* Shut down the journal. This will write a checkpoint record
* to the log, so recovery manager knows the file has been
* closed in a clean way.
*
* @param txnId
*/
public void shutdown(final long txnId, final boolean checkpoint) {
if (currentBuffer == null) {
return; // the db has probably shut down already
}
if (!BrokerPool.FORCE_CORRUPTION) {
if (checkpoint) {
LOG.info("Transaction journal cleanly shutting down with checkpoint...");
try {
writeToLog(new Checkpoint(txnId));
} catch (final JournalException e) {
LOG.error("An error occurred while closing the journal file: " + e.getMessage(), e);
}
}
flushBuffer();
}
fileLock.release();
syncThread.shutdown();
try {
syncThread.join();
} catch (final InterruptedException e) {
//Nothing to do
}
currentBuffer = null;
}
/**
* Called to signal that the db is currently in
* recovery phase, so no output should be written.
*
* @param value
*/
public void setInRecovery(final boolean value) {
inRecovery = value;
}
/**
* Translate a file number into a file name.
*
* @param fileNum
* @return The file name
*/
private static String getFileName(final int fileNum) {
String hex = Integer.toHexString(fileNum);
hex = "0000000000".substring(hex.length()) + hex;
return hex + '.' + LOG_FILE_SUFFIX;
}
private static class RemoveThread extends Thread {
final FileChannel channel;
final Path path;
RemoveThread(final FileChannel channel, final Path path) {
super("RemoveJournalThread");
this.channel = channel;
this.path = path;
}
@Override
public void run() {
try {
if (channel != null) {
channel.close();
}
} catch (final IOException e) {
LOG.warn("Exception while closing journal file: " + e.getMessage(), e);
}
FileUtils.deleteQuietly(path);
}
}
}