/*
* The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
* (the "License"). You may not use this work except in compliance with the License, which is
* available at www.apache.org/licenses/LICENSE-2.0
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied, as more fully set forth in the License.
*
* See the NOTICE file distributed with this work for information regarding copyright ownership.
*/
package alluxio.master.journal.ufs;
import alluxio.exception.ExceptionMessage;
import alluxio.exception.InvalidJournalEntryException;
import alluxio.master.journal.JournalReader;
import alluxio.master.journal.options.JournalReaderOptions;
import alluxio.proto.journal.Journal;
import alluxio.underfs.UnderFileSystem;
import alluxio.util.proto.ProtoUtils;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayDeque;
import java.util.Queue;
import javax.annotation.concurrent.NotThreadSafe;
/**
* Implementation of {@link JournalReader} that reads journal entries from a UFS. It can optionally
* read after a given sequence number. By default, it starts from 0 sequence number.
* If this reader runs in a primary master, it reads the incomplete log.
* If this reader runs in a secondary master, it does not read the incomplete log.
*/
@NotThreadSafe
final class UfsJournalReader implements JournalReader {
private static final Logger LOG = LoggerFactory.getLogger(UfsJournalReader.class);
private final UfsJournal mJournal;
private final UnderFileSystem mUfs;
/** Whether the reader runs in a primary master. */
private final boolean mPrimary;
/**
* The next edit log sequence number to read. This is not incremented when reading from
* the checkpoint.
*/
private long mNextSequenceNumber;
/** The input stream to read the journal entries. */
private JournalInputStream mInputStream;
/** A queue of files to be processed including checkpoint and logs. */
private final Queue<UfsJournalFile> mFilesToProcess;
/** Buffer used to read from the file. */
private final byte[] mBuffer = new byte[1024];
/** Whether the reader is closed. */
private boolean mClosed;
/**
* A simple wrapper that wraps the journal file and the input stream.
*/
private class JournalInputStream implements Closeable {
final UfsJournalFile mFile;
/** The input stream that reads from a file. */
final InputStream mStream;
JournalInputStream(UfsJournalFile file) throws IOException {
mFile = file;
LOG.info("Reading journal file {}.", file.getLocation());
mStream = mUfs.open(file.getLocation().toString());
}
/**
* @return whether we have finished reading the current file
*/
boolean isDone() {
return mFile.getEnd() == mNextSequenceNumber;
}
@Override
public void close() throws IOException {
mStream.close();
}
}
/**
* Creates a new instance of {@link UfsJournalReader}.
*
* @param journal the handle to the journal
*/
UfsJournalReader(UfsJournal journal, JournalReaderOptions options) {
mFilesToProcess = new ArrayDeque<>();
mJournal = Preconditions.checkNotNull(journal, "journal");
mUfs = mJournal.getUfs();
mNextSequenceNumber = options.getNextSequenceNumber();
mPrimary = options.isPrimary();
}
@Override
public void close() throws IOException {
if (mClosed) {
return;
}
mClosed = true;
if (mInputStream != null) {
mInputStream.close();
}
}
@Override
public long getNextSequenceNumber() {
return mNextSequenceNumber;
}
@Override
public Journal.JournalEntry read() throws IOException, InvalidJournalEntryException {
while (true) {
Journal.JournalEntry entry = readInternal();
if (entry == null) {
return null;
}
if (mInputStream.mFile.isCheckpoint()) {
return entry;
} else if (entry.getSequenceNumber() == mNextSequenceNumber) {
mNextSequenceNumber++;
return entry;
} else if (entry.getSequenceNumber() < mNextSequenceNumber) {
// This can happen in the following two scenarios:
// 1. The primary master failed when renaming the current log to completed log which might
// result in duplicate logs.
// 2. The first completed log after the checkpoint's last sequence number might contains
// some duplicate entries with the checkpoint.
LOG.debug("Skipping duplicate log entry {} (next sequence number: {}).", entry,
mNextSequenceNumber);
} else {
throw new InvalidJournalEntryException(ExceptionMessage.JOURNAL_ENTRY_MISSING,
mNextSequenceNumber, entry.getSequenceNumber());
}
}
}
/**
* The real read implementation that reads a journal entry from a journal file.
*
* @return the journal entry, null if no journal entry is found
* @throws InvalidJournalEntryException if the journal entry found is invalid
*/
private Journal.JournalEntry readInternal() throws IOException, InvalidJournalEntryException {
updateInputStream();
if (mInputStream == null) {
return null;
}
int firstByte = mInputStream.mStream.read();
if (firstByte == -1) {
// If this is the checkpoint file, we need to reset the sequence number to update the stream
// because the sequence number in the checkpoint entries is not in the same space as the
// sequence number in the edit logs.
if (mInputStream.mFile.isCheckpoint()) {
mNextSequenceNumber = mInputStream.mFile.getEnd();
return readInternal();
}
if (!mInputStream.mFile.isIncompleteLog()) {
throw new InvalidJournalEntryException(
ExceptionMessage.JOURNAL_ENTRY_TRUNCATED_UNEXPECTEDLY, mNextSequenceNumber);
}
return null;
}
// All journal entries start with their size in bytes written as a varint.
int size;
try {
size = ProtoUtils.readRawVarint32(firstByte, mInputStream.mStream);
} catch (IOException e) {
LOG.warn("Journal entry was truncated in the size portion.");
if (mInputStream.mFile.isIncompleteLog() && ProtoUtils.isTruncatedMessageException(e)) {
return null;
}
throw e;
}
byte[] buffer = size <= mBuffer.length ? mBuffer : new byte[size];
// Total bytes read so far for journal entry.
int totalBytesRead = 0;
while (totalBytesRead < size) {
// Bytes read in last read request.
int latestBytesRead =
mInputStream.mStream.read(buffer, totalBytesRead, size - totalBytesRead);
if (latestBytesRead < 0) {
break;
}
totalBytesRead += latestBytesRead;
}
if (totalBytesRead < size) {
LOG.warn("Journal entry was truncated. Expected to read " + size + " bytes but only got "
+ totalBytesRead);
if (!mInputStream.mFile.isIncompleteLog()) {
throw new InvalidJournalEntryException(
ExceptionMessage.JOURNAL_ENTRY_TRUNCATED_UNEXPECTEDLY, mNextSequenceNumber);
}
return null;
}
Journal.JournalEntry entry =
Journal.JournalEntry.parseFrom(new ByteArrayInputStream(buffer, 0, size));
return entry;
}
/**
* Updates the journal input stream by closing the current journal input stream if it is done and
* opening a new one.
*/
private void updateInputStream() throws IOException {
if (mInputStream != null && (mInputStream.mFile.isIncompleteLog() || !mInputStream.isDone())) {
return;
}
if (mInputStream != null) {
mInputStream.close();
mInputStream = null;
}
if (mFilesToProcess.isEmpty()) {
UfsJournalSnapshot snapshot = UfsJournalSnapshot.getSnapshot(mJournal);
if (snapshot.getCheckpoints().isEmpty() && snapshot.getLogs().isEmpty()) {
return;
}
int index = 0;
if (!snapshot.getCheckpoints().isEmpty()) {
UfsJournalFile checkpoint = snapshot.getLatestCheckpoint();
if (mNextSequenceNumber < checkpoint.getEnd()) {
mFilesToProcess.add(checkpoint);
// Reset the sequence number to 0 because it is not supported to read from checkpoint with
// an offset. This can only happen in the following scenario:
// 1. Read checkpoint to SN1, then optionally read completed logs to SN2 (>= SN1).
// 2. A new checkpoint is written to SN3 (> SN2).
// 3. Resume reading from SN2.
mNextSequenceNumber = 0;
}
for (; index < snapshot.getLogs().size(); index++) {
UfsJournalFile file = snapshot.getLogs().get(index);
if (file.getEnd() > checkpoint.getEnd()) {
break;
}
}
// index now points to the first log with mEnd > checkpoint.mEnd.
}
for (; index < snapshot.getLogs().size(); index++) {
UfsJournalFile file = snapshot.getLogs().get(index);
if ((!mPrimary && file.isIncompleteLog()) || mNextSequenceNumber >= file.getEnd()) {
continue;
}
mFilesToProcess.add(snapshot.getLogs().get(index));
}
}
if (!mFilesToProcess.isEmpty()) {
mInputStream = new JournalInputStream(mFilesToProcess.poll());
}
}
}