package io.eguan.dtx.journal;
/*
* #%L
* Project eguan
* %%
* Copyright (C) 2012 - 2017 Oodrive
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import static io.eguan.dtx.DtxConstants.DEFAULT_JOURNAL_FILE_PREFIX;
import static io.eguan.dtx.DtxConstants.DEFAULT_LAST_TX_VALUE;
import static io.eguan.dtx.DtxConstants.JOURNAL_FILE_EXTENSION;
import static io.eguan.dtx.DtxUtils.updateAtomicLongToAtLeast;
import static io.eguan.dtx.journal.JournalFileUtils.readLastCompleteTxId;
import io.eguan.dtx.journal.JournalRotationManager.RotationEvent;
import io.eguan.dtx.journal.JournalRotationManager.RotationListener;
import io.eguan.dtx.journal.JournalRotationManager.RotationEvent.RotationStage;
import io.eguan.proto.Common.ProtocolVersion;
import io.eguan.proto.dtx.DistTxWrapper;
import io.eguan.proto.dtx.DistTxWrapper.TxJournalEntry;
import io.eguan.proto.dtx.DistTxWrapper.TxJournalEntry.TxOpCode;
import io.eguan.proto.dtx.DistTxWrapper.TxMessage;
import io.eguan.proto.dtx.DistTxWrapper.TxNode;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileLock;
import java.nio.file.FileSystems;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import javax.annotation.Nonnull;
import javax.annotation.ParametersAreNonnullByDefault;
import javax.annotation.concurrent.GuardedBy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.MoreObjects;
import com.google.common.base.Strings;
import com.google.common.io.Files;
/**
* Public interface to a readable and writable journal instance.
*
* @author oodrive
* @author pwehrle
*
*/
public final class WritableTxJournal implements Iterable<JournalRecord> {
private static final long MIN_ROTATION_THRESHOLD = 4096; // 4 KiB
private static final Logger LOGGER = LoggerFactory.getLogger(WritableTxJournal.class);
private final File journalDirectory;
private final ReentrantReadWriteLock accessLock = new ReentrantReadWriteLock(true);
@GuardedBy("accessLock")
private FileChannel writeChannel;
@GuardedBy("accessLock")
private List<RwJournalIterator> readIterators;
@GuardedBy("accessLock")
private FileLock fileLock;
@GuardedBy("accessLock")
private volatile boolean started = false;
@GuardedBy("accessLock")
private volatile boolean starting = false;
private final AtomicLong lastFinishedTxId = new AtomicLong(DEFAULT_LAST_TX_VALUE);
private final Path journalFile;
private final long rotationThreshold;
private final JournalRotationManager rotationMgr;
/**
* Constructs an instance using the given filename prefix.
*
* @param journalDirectory
* a {@link File} pointing to an existing and writable directory
* @param filenamePrefix
* the prefix to use for the journal file, defaults to {@value #DEFAULT_JOURNAL_FILE_PREFIX} if
* <code>null</code>
* @param rotThreshold
* the size threshold in bytes for journal files above which to start requesting rotation, defaults to
* {@value #MIN_ROTATION_THRESHOLD} if given an inferior value
* @param rotManager
* {@link JournalRotationManager} in charge of rotation
* @throws IllegalArgumentException
* if the journal directory does not exist or is not writable
* @throws NullPointerException
* if the argument is <code>null</code>
*/
public WritableTxJournal(@Nonnull final File journalDirectory, final String filenamePrefix,
final long rotThreshold, final JournalRotationManager rotManager) throws IllegalArgumentException,
NullPointerException {
this.journalDirectory = Objects.requireNonNull(journalDirectory);
this.rotationThreshold = Math.max(rotThreshold, MIN_ROTATION_THRESHOLD);
this.rotationMgr = Objects.requireNonNull(rotManager);
String filename = Strings.isNullOrEmpty(filenamePrefix) ? DEFAULT_JOURNAL_FILE_PREFIX : filenamePrefix;
filename += JOURNAL_FILE_EXTENSION;
this.journalFile = FileSystems.getDefault().getPath(journalDirectory.getAbsolutePath(), filename);
}
/**
* Writes a start entry to the journal.
*
* @param txMessage
* the complete transaction to write
* @param participants
* the set of participant {@link TxNode}s
* @throws IOException
* if writing to the journal fails
* @throws IllegalStateException
* if the {@link WritableTxJournal} was not {@link #start() started}
*/
public final void writeStart(@Nonnull final TxMessage txMessage, @Nonnull final Iterable<TxNode> participants)
throws IOException, IllegalStateException {
if (!started) {
throw new IllegalStateException("Not yet started, stopped or aborted");
}
checkRotationCondition();
final long txId = txMessage.getTxId();
final TxJournalEntry txJEntry = DistTxWrapper.TxJournalEntry.newBuilder()
.setTimestamp(System.currentTimeMillis()).setVersion(ProtocolVersion.VERSION_1).setTxId(txId)
.setOp(TxOpCode.START).addAllTxNodes(Objects.requireNonNull(participants)).setTx(txMessage).build();
final JournalRecord record = new JournalRecord(txJEntry.toByteArray());
accessLock.writeLock().lock();
try {
if (!writeChannel.isOpen()) {
openAndLockJournalFile();
}
assert writeChannel.isOpen();
writeChannel.write(ByteBuffer.wrap(record.getContent()));
}
finally {
accessLock.writeLock().unlock();
}
}
/**
* Writes a commit entry to the journal.
*
* @param txId
* the transaction ID to include in the entry
* @param participants
* the set of participant {@link TxNode}s
* @throws IOException
* if writing the entry fails
* @throws IllegalStateException
* if the {@link WritableTxJournal} was not {@link #start() started}
*/
public final void writeCommit(final long txId, @Nonnull final Iterable<TxNode> participants) throws IOException,
IllegalStateException {
if (!started) {
throw new IllegalStateException("Not yet started, stopped or aborted");
}
checkRotationCondition();
final TxJournalEntry txJEntry = DistTxWrapper.TxJournalEntry.newBuilder()
.setTimestamp(System.currentTimeMillis()).setVersion(ProtocolVersion.VERSION_1).setTxId(txId)
.setOp(TxOpCode.COMMIT).addAllTxNodes(Objects.requireNonNull(participants)).build();
final JournalRecord record = new JournalRecord(txJEntry.toByteArray());
accessLock.writeLock().lock();
try {
if (!writeChannel.isOpen()) {
openAndLockJournalFile();
}
assert writeChannel.isOpen();
writeChannel.write(ByteBuffer.wrap(record.getContent()));
lastFinishedTxId.set(txId);
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("Committed transaction; txId=" + txId + ", journal=" + journalFile);
}
}
finally {
accessLock.writeLock().unlock();
}
}
/**
* Writes a rollback entry to the journal.
*
* @param txId
* the transaction ID to include in the entry
* @param errCode
* an optional error code causing the rollback
* @param participants
* the set of participant {@link TxNode}s
* @throws IOException
* if writing the entry fails
* @throws IllegalStateException
* if the {@link WritableTxJournal} was not {@link #start() started}
*/
public final void writeRollback(final long txId, final int errCode, @Nonnull final Iterable<TxNode> participants)
throws IOException, IllegalStateException {
if (!started) {
throw new IllegalStateException("Not yet started, stopped or aborted");
}
checkRotationCondition();
final TxJournalEntry txJEntry = DistTxWrapper.TxJournalEntry.newBuilder()
.setTimestamp(System.currentTimeMillis()).setVersion(ProtocolVersion.VERSION_1).setTxId(txId)
.setOp(TxOpCode.ROLLBACK).addAllTxNodes(participants).setErrCode(errCode).build();
final JournalRecord record = new JournalRecord(txJEntry.toByteArray());
accessLock.writeLock().lock();
try {
if (!writeChannel.isOpen()) {
openAndLockJournalFile();
}
assert writeChannel.isOpen();
writeChannel.write(ByteBuffer.wrap(record.getContent()));
lastFinishedTxId.set(txId);
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("Rolled back transaction; txId=" + txId + ", journal=" + journalFile);
}
}
finally {
accessLock.writeLock().unlock();
}
}
/**
* Constructs a new {@link ReadOnlyTxJournal} view on this journal.
*
* @return a valid {@link ReadOnlyTxJournal} instance
* @throws IllegalStateException
* if this instance is not {@link #start() started}
*/
public final ReadOnlyTxJournal newReadOnlyTxJournal() throws IllegalStateException {
if (!started && !starting) {
throw new IllegalStateException("Not started and not starting");
}
return new ReadOnlyTxJournal(this.journalFile.toFile(), this.rotationMgr);
}
/**
* Starts the journal into a state ready to receive input/output operations.
*
* @throws IOException
* if opening the journal file fails
* @throws IllegalStateException
* if the target directory does not exist or is not writable
*/
public final void start() throws IOException, IllegalStateException {
accessLock.writeLock().lock();
try {
if (started || starting) {
return;
}
starting = true;
if (!this.journalDirectory.exists() || !this.journalDirectory.canWrite()) {
throw new IllegalStateException("Journal directory does not exist or is not writable");
}
openAndLockJournalFile();
readIterators = Collections.synchronizedList(new ArrayList<RwJournalIterator>());
updateLastTxCounters();
starting = false;
started = true;
}
finally {
accessLock.writeLock().unlock();
}
}
/**
* Gets the started state.
*
* @return <code>true</code> if the journal is ready to perform input/output operations, <code>false</code>
* otherwise
*/
public final boolean isStarted() {
return started;
}
/**
* Stops the instance.
*
* @throws IOException
* if flushing output or closing the journal file fails
*/
public final void stop() throws IOException {
accessLock.writeLock().lock();
try {
if (!started) {
return;
}
closeAndReleaseJournalFile();
// closes all read channels
closeReadIterators();
started = false;
}
finally {
accessLock.writeLock().unlock();
}
}
/**
* Gets the ID of the last finished transaction.
*
*
* @return an up-to-date long value or {@value TransactionManager#DEFAULT_LAST_TX_VALUE} if no completed transaction
* was found
* @throws IllegalStateException
* if the journal is not {@link #started}
*/
public final long getLastFinishedTxId() throws IllegalStateException {
if (!started) {
throw new IllegalStateException("Not started");
}
return lastFinishedTxId.get();
}
/**
* Gets the absolute filename of the journal file.
*
* @return a non-empty {@link String}
*/
public final String getJournalFilename() {
return journalFile.toString();
}
private final void closeAndReleaseJournalFile() throws IOException {
assert accessLock.writeLock().isHeldByCurrentThread();
try {
// releases file lock
if (fileLock.isValid()) {
fileLock.release();
}
}
catch (final IOException e) {
LOGGER.warn("Error releasing lock on journal file; journalFile=" + journalFile, e);
}
finally {
if (writeChannel.isOpen()) {
writeChannel.close();
}
}
}
/**
* Closes all read iterators, suppressing any exceptions thrown by {@link AutoCloseable#close()}, and clear the
* {@link #readIterators} list. Needs external locking.
*/
private final void closeReadIterators() {
synchronized (readIterators) {
for (final RwJournalIterator currReadIterator : readIterators) {
try {
currReadIterator.close();
}
catch (final Exception e) {
// logs exceptions
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Failed to close read channel; file=" + journalFile.toString());
}
}
}
readIterators.clear();
}
}
/**
* Does the rotation of journal backup files and of the journal file itself.
*
* Note: This closes all active read iterators pending a consistent way to iterate through rotations.
*
* {@link IOException}s are handled internally to avoid crashing the calling threads. Most conditions are recovered
* gracefully but if no operational state can be re-established, an emergency {@link #stop()} of the journal is
* requested. An exception are {@link InterruptedException}s, as this is something the calling method should know
* about.
*
* @param listeners
* optional {@link RotationListener}s to which to dispatch {@link RotationEvent}s
* @throws InterruptedException
* if the thread is interrupted during processing
*
*/
final void executeRotation(final RotationListener... listeners) throws InterruptedException {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Rotating journal file; file=" + journalFile + ", listeners: " + Arrays.asList(listeners));
}
final String journalFilename = journalFile.toString();
accessLock.readLock().lock();
try {
if (!needsRotation()) {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Journal does not need rotation; file=" + journalFile);
}
return;
}
final RotationEvent preRotEvt = new RotationEvent(journalFilename, RotationEvent.RotationStage.PRE_ROTATE);
for (final RotationListener currListener : listeners) {
try {
currListener.rotationEventOccured(preRotEvt);
}
catch (final Throwable t) {
LOGGER.warn("Exception on notification listener", t);
}
}
shiftExistingBackupFiles(journalDirectory, journalFile);
}
finally {
accessLock.readLock().unlock();
}
boolean rotationSuccess = false;
// lock and move the journal file itself
accessLock.writeLock().lock();
try {
try {
closeAndReleaseJournalFile();
}
catch (final IOException e) {
// if the write channel is still open, abort gracefully
if (writeChannel.isOpen()) {
return;
}
}
closeReadIterators();
final File firstBackupFile = new File(journalDirectory, journalFile.getFileName() + ".1");
try {
if (firstBackupFile.exists()) {
LOGGER.warn("Could not rotate journal file, backup exists; backup=" + firstBackupFile);
return;
}
else {
Files.move(this.journalFile.toFile(), firstBackupFile);
}
}
catch (final IOException e) {
// moving failed, return hoping the channel can be re-opened
LOGGER.warn("Backing up journal failed", e);
}
finally {
// tries to put journal back into operation
openAndLockJournalFile();
rotationSuccess = true;
}
}
catch (final IOException e) {
// re-opening the write channel failed
LOGGER.error("Could not re-open journal file channel", e);
}
finally {
accessLock.writeLock().unlock();
final RotationEvent endRotEvt = new RotationEvent(journalFilename,
rotationSuccess ? RotationStage.ROTATE_SUCCESS : RotationStage.ROTATE_FAILURE);
for (final RotationListener currListener : listeners) {
try {
currListener.rotationEventOccured(endRotEvt);
}
catch (final Throwable t) {
LOGGER.warn("Exception on notification listener", t);
}
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Rotation finished; file=" + journalFile + ", result=" + endRotEvt.getStage());
}
}
}
/**
* Shifts all backup files of the form <journalFilename>.<backupNumber>. This needs an external read lock.
*
* @param journalDirectory
* @param journalFile
* @throws IOException
*/
private static void shiftExistingBackupFiles(final File journalDirectory, final Path journalFile) {
final String journalFilename = journalFile.getFileName().toString();
// retrieves an inverse order map of backup files
final Map<Integer, File> jFileMap = JournalFileUtils.getInverseBackupMap(journalDirectory, journalFilename);
// rotates each one of the existing backup files
for (final Integer currIndex : jFileMap.keySet()) {
final int currIndexValue = currIndex.intValue();
if (LOGGER.isTraceEnabled()) {
LOGGER.trace("Backup to rotate found; rank=" + currIndex + ", file=" + jFileMap.get(currIndex));
}
// shifts the backup file to the next index
final File srcFile = jFileMap.get(currIndex);
final File targetFile = new File(journalDirectory, journalFilename + "." + (currIndexValue + 1));
if (srcFile.exists() && !targetFile.exists()) {
try {
Files.move(srcFile, targetFile);
}
catch (final IOException e) {
// suppress errors and log them
LOGGER.warn("Failed to move backup file; src=" + srcFile + ", destination=" + targetFile, e);
}
}
else {
LOGGER.warn("Not rotating backup file, either source doesn't exist or destination does; source="
+ srcFile + ", destination=" + targetFile);
}
}
}
/**
* Checks if the journal needs to be rotated with {@link #needsRotation()} and if <code>true</code> submits a
* rotation request to the local {@link #rotationMgr}.
*
* This must not be called while holding a write lock on {@link #accessLock}.
*/
private final void checkRotationCondition() {
if (!needsRotation()) {
return;
}
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Submitting rotation; file=" + journalFile);
}
this.rotationMgr.submitRotation(this);
}
/**
* Checks if the rotation size condition is fulfilled.
*
* This method acquires a shared lock on {@link #accessLock} and therefore must not be called while holding a write
* lock.
*
* @return <code>true</code> if the file's size could be determined and is over the threshold, <code>false</code>
* otherwise
*/
private final boolean needsRotation() {
try {
accessLock.readLock().lockInterruptibly();
}
catch (final InterruptedException e1) {
LOGGER.warn("Interrupted while checking rotation condition");
return false;
}
try {
if (!writeChannel.isOpen()) {
return false;
}
return (started && (writeChannel.size() > this.rotationThreshold));
}
catch (final IOException e) {
LOGGER.warn("Could not determine journal file size", e);
return false;
}
finally {
accessLock.readLock().unlock();
}
}
private final void openAndLockJournalFile() throws IOException {
writeChannel = FileChannel.open(journalFile, StandardOpenOption.CREATE, StandardOpenOption.APPEND,
StandardOpenOption.DSYNC);
this.fileLock = writeChannel.lock();
}
private final void updateLastTxCounters() {
long lastFinished = Math.max(this.lastFinishedTxId.get(), readLastCompleteTxId(this));
if (lastFinished == DEFAULT_LAST_TX_VALUE) {
lastFinished = readLastCompleteTxId(newReadOnlyTxJournal());
}
updateAtomicLongToAtLeast(lastFinishedTxId, lastFinished);
}
/**
* Weakly consistent iterator for a read/write journal.
*
*
*/
private final class RwJournalIterator implements Iterator<JournalRecord>, AutoCloseable {
private JournalRecord currentRecord;
private final Lock readLock;
private final FileChannel journal;
/**
* Constructs a read-only iterator for a transaction journal file.
*
* @param readLock
* the shared lock on guarding the file to acquire before reading from it
* @param journal
* the target journal file
* @throws IOException
* if initializing the read access to the file fails
*/
@ParametersAreNonnullByDefault
RwJournalIterator(final Lock readLock, final Path journal) throws IOException {
this.readLock = Objects.requireNonNull(readLock);
this.journal = FileChannel.open(journal, StandardOpenOption.READ);
readNextRecord();
}
@Override
public final boolean hasNext() {
return (currentRecord != null);
}
@Override
public final JournalRecord next() {
if (currentRecord == null) {
throw new NoSuchElementException();
}
final JournalRecord result = currentRecord;
readNextRecord();
return result;
}
@Override
public final void remove() {
throw new UnsupportedOperationException();
}
private final void readNextRecord() {
readLock.lock();
try {
if (!journal.isOpen()) {
terminateIteration();
return;
}
currentRecord = JournalRecord.readRecordFromByteChannel(journal);
}
catch (final IOException e) {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Could not read from journal", e);
}
// skipping or reading failed once, so we consider any further read attempts futile
terminateIteration();
}
finally {
readLock.unlock();
}
}
private final void terminateIteration() {
currentRecord = null;
}
@Override
public final void close() throws Exception {
readLock.lock();
try {
journal.close();
}
finally {
readLock.unlock();
}
}
}
/**
* Noop iterator in case the journal is stopped or the read channel cannot be set up.
*
*
*/
private final class UnresponsiveIterator implements Iterator<JournalRecord> {
@Override
public final boolean hasNext() {
return false;
}
@Override
public final JournalRecord next() {
throw new NoSuchElementException();
}
@Override
public final void remove() {
throw new UnsupportedOperationException();
}
}
@Override
public final Iterator<JournalRecord> iterator() {
// only takes the read lock while modifying the synchronized list of read channels
accessLock.readLock().lock();
try {
if (!started) {
return new UnresponsiveIterator();
}
final RwJournalIterator result = new RwJournalIterator(accessLock.readLock(), journalFile);
readIterators.add(result);
return result;
}
catch (final IOException e) {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Failed to open read channel on journal; file=" + journalFile.toString(), e);
}
return new UnresponsiveIterator();
}
finally {
accessLock.readLock().unlock();
}
}
@Override
public final String toString() {
return MoreObjects.toStringHelper(WritableTxJournal.class).add("journalFile", journalFile)
.add("started", started).add("rotationThreshold", rotationThreshold).toString();
}
}