/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode.bookkeeper;
import org.apache.bookkeeper.client.BKException;
import org.apache.bookkeeper.client.LedgerEntry;
import org.apache.bookkeeper.client.LedgerHandle;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import java.io.IOException;
import java.io.InputStream;
import java.util.Enumeration;
import static org.apache.hadoop.hdfs.server.namenode.bookkeeper.BookKeeperJournalManager.bkException;
import static org.apache.hadoop.hdfs.server.namenode.bookkeeper.zk.ZkUtil.interruptedException;
/**
* A {@link InputStream} over a BookKeeper ledger which maps to a specific
* edit log segment.
*/
public class BookKeeperJournalInputStream extends InputStream {
private static final Log LOG =
LogFactory.getLog(BookKeeperJournalInputStream.class);
// BookKeeper ledger is mutable as the ledger may need to be re-opened
// by the caller in order to find the true end for tailing
private LedgerHandle ledger;
// This is not txId, this is the id of the first ledger entry
private final long firstLedgerEntryId;
// Maximum ledger entry id seen so far. In an in-progress edit log
// stream this changes over time
private long maxLedgerEntryIdSeen;
private InputStream entryStream;
// Keep track of the current state (see the InputStreamState inner
// class for more detailed information) and the "last known good" state
// (updated by calling savePosition()).
private InputStreamState currentStreamState;
private InputStreamState savedStreamState;
static class InputStreamState {
private long offsetInLedger; // How many bytes have we read from this ledger
private long readerPosition; // How many bytes has the reader read
private long nextLedgerEntryId; // Next ledger entry id to read
private int offsetInEntry; // Bytes read from the current ledger entry
InputStreamState() {
offsetInLedger = 0;
offsetInEntry = 0;
}
/**
* Create a copy of another state object. Used to save the current state.
*/
static InputStreamState copyOf(InputStreamState state) {
InputStreamState copyState = new InputStreamState();
copyState.setNextLedgerEntryId(state.getNextLedgerEntryId());
copyState.setOffsetInEntry(state.getOffsetInEntry());
copyState.setOffsetInLedger(state.getOffsetInLedger());
copyState.setReaderPosition(state.getReaderPosition());
return copyState;
}
long getOffsetInLedger() {
return offsetInLedger;
}
void setOffsetInLedger(long offsetInLedger) {
this.offsetInLedger = offsetInLedger;
}
void advanceOffsetInLedger(long numBytes) {
offsetInLedger += numBytes;
}
long getReaderPosition() {
return readerPosition;
}
void setReaderPosition(long readerPosition) {
this.readerPosition = readerPosition;
}
long getNextLedgerEntryId() {
return nextLedgerEntryId;
}
void incrementNextLedgerEntryId() {
this.nextLedgerEntryId++;
}
void setNextLedgerEntryId(long nextLedgerEntryId) {
this.nextLedgerEntryId = nextLedgerEntryId;
}
int getOffsetInEntry() {
return offsetInEntry;
}
void setOffsetInEntry(int offsetInEntry) {
this.offsetInEntry = offsetInEntry;
}
void advanceOffsetInEntry(long numBytes) {
offsetInEntry += numBytes;
}
}
/**
* Create an input stream object for a specified BookKeper ledger
* @param ledger The initial ledger instance
* @param firstLedgerEntryId First ledger entry id (this is different from
* HDFS transaction id!) to read from the ledger.
*/
public BookKeeperJournalInputStream(LedgerHandle ledger,
long firstLedgerEntryId) {
this.ledger = ledger;
this.firstLedgerEntryId = firstLedgerEntryId;
maxLedgerEntryIdSeen = ledger.getLastAddConfirmed();
currentStreamState = new InputStreamState();
currentStreamState.setNextLedgerEntryId(firstLedgerEntryId);
}
@Override
public int read() throws IOException {
byte[] data = new byte[1];
if (read(data, 0, 1) != 1) {
return -1;
}
return data[0];
}
// Once we've reached the end of an entry stream, we want to open
// a new stream for a new ledger entry
private InputStream nextEntryStream() throws IOException {
long nextLedgerEntryId = currentStreamState.getNextLedgerEntryId();
if (nextLedgerEntryId > maxLedgerEntryIdSeen) {
updateMaxLedgerEntryIdSeen();
if (nextLedgerEntryId > maxLedgerEntryIdSeen) {
// Return null if we've reached the end of the ledger: we can not
// read beyond the end of the ledger and it is up to the caller to
// either find the new "tail" of the ledger (if the ledger is in-
// progress) or open the next ledger (if the ledger is finalized)
if (LOG.isDebugEnabled()) {
LOG.debug("Requesting to ledger entryId " + nextLedgerEntryId +
", but "+ " maxLedgerEntryIdSeen is " + maxLedgerEntryIdSeen +
", ledger length is " + ledger.getLength());
}
return null;
}
}
try {
Enumeration<LedgerEntry> entries =
ledger.readEntries(nextLedgerEntryId, nextLedgerEntryId);
currentStreamState.incrementNextLedgerEntryId();
if (entries.hasMoreElements()) {
LedgerEntry entry = entries.nextElement();
if (entries.hasMoreElements()) {
throw new IllegalStateException("More than one entry retrieved!");
}
currentStreamState.setOffsetInEntry(0);
return entry.getEntryInputStream();
}
} catch (BKException e) {
throw new IOException("Unrecoverable BookKeeper error reading entry " +
nextLedgerEntryId, e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IOException("Interrupted reading BookKeeper entry " +
nextLedgerEntryId, e);
}
return null;
}
/**
* Change the underlying ledger object in order to be able to correctly
* determine the "tail" of the ledger.
* @param ledger The new ledger object
*/
public void resetLedger(LedgerHandle ledger) throws IOException {
this.ledger = ledger;
updateMaxLedgerEntryIdSeen();
}
/**
* Set <code>maxLedgerEntryIdSeen</code> to the maximum of last confirmed
* entry-id from a quorum of bookies and last confirmed entry-id from
* metadata stored in ZooKeeper. The reason is to handle the case of
* when a ledger becomes finalized mid-flight: in this case last confirmed
* entry-id that is read from a quorum is no longer reliable, but a reliable
* last-confirmed entry-id is now available in ZooKeeper metadata which is
* updated when a ledger is finalized.
* @throws IOException If there's an error talking to BookKeeper
* or ZooKeeper
*/
private void updateMaxLedgerEntryIdSeen() throws IOException {
long lcFromMetadata = ledger.getLastAddConfirmed();
long lcFromQuorum;
try {
lcFromQuorum = ledger.readLastConfirmed();
} catch (BKException e) {
bkException("Unable to read last confirmed ledger entry id "
+ "from ledger " + ledger.getId(), e);
return;
} catch (InterruptedException e) {
interruptedException("Interrupted reading last confirmed ledger " +
"entry id from ledger " + ledger.getId(), e);
return;
}
long newMaxLedgerEntryIdSeen = Math.max(lcFromMetadata, lcFromQuorum);
if (newMaxLedgerEntryIdSeen > maxLedgerEntryIdSeen) {
if (LOG.isDebugEnabled()) {
LOG.debug("Resetting maxLedgerEntryIdSeen from " + maxLedgerEntryIdSeen +
" to " + newMaxLedgerEntryIdSeen);
}
maxLedgerEntryIdSeen = newMaxLedgerEntryIdSeen;
}
}
/**
* Preserve the state associated with the specified reader position
* (meant for use with {@link #position(long)}
* @param position The external reader position associated with the
* current ledger state.
*/
public void savePosition(long position) {
currentStreamState.setReaderPosition(position);
savedStreamState = InputStreamState.copyOf(currentStreamState);
}
/**
* "Go back" to the specified reader position by resetting the reader
* a saved state associated with that position.
* @param position The reader position we want to go back to
* @throws IllegalArgumentException If an illegal position is specified
* @throws IOException If there is an error communicating with BookKeeper
*/
public void position(long position) throws IOException {
if (position == 0) {
currentStreamState.setNextLedgerEntryId(firstLedgerEntryId);
currentStreamState.setOffsetInEntry(0);
entryStream = null;
} else if (savedStreamState == null ||
position != savedStreamState.getReaderPosition()) {
// Seek to an arbitrary position through "brute force"
if (position > Integer.MAX_VALUE) {
throw new IllegalArgumentException("Asked to position to " + position +
", but can only \"brute-force\" skip up" + Integer.MAX_VALUE);
}
position(0);
skip(position, (int) position);
} else {
// savedStream != null && position == savedStream.getReaderPosition()
int bytesToSkip = 0;
if (savedStreamState.getOffsetInLedger() > position) {
// Since reading from the input stream is buffered, we usually will
// read further into the ledger than the reader has actually
// read into. In this case we will need to find out exactly *what*
// position within the ledger entry matches with the reader's last
// known good position.
long entryStartPosition = savedStreamState.getOffsetInLedger() -
savedStreamState.getOffsetInEntry();
bytesToSkip = (int) (position - entryStartPosition);
} else if (savedStreamState.getOffsetInLedger() < position) {
throw new IllegalArgumentException("Saved offset in ledger (" +
savedStreamState.getOffsetInLedger() + ") < position(" + position
+ ")");
}
long nextLedgerEntryId = savedStreamState.getNextLedgerEntryId() == firstLedgerEntryId ?
firstLedgerEntryId : (savedStreamState.getNextLedgerEntryId() - 1);
currentStreamState.setNextLedgerEntryId(nextLedgerEntryId);
if (bytesToSkip > 0) {
entryStream = null;
skip(position, bytesToSkip);
} else {
if (currentStreamState.getNextLedgerEntryId() > 0) {
currentStreamState.setNextLedgerEntryId(currentStreamState.getNextLedgerEntryId() - 1);
}
entryStream = nextEntryStream();
}
}
currentStreamState.setOffsetInLedger(position);
}
private void skip(long position, int bytesToSkip) throws IOException {
// Read further into the ledger such that our position matches the
// position last consumed by the reader. Discard the data read.
LOG.info("Attempting to skip " + bytesToSkip + " bytes to get to position "
+ position);
byte[] data = new byte[bytesToSkip];
int skipped;
if ((skipped = read(data, 0, bytesToSkip)) != bytesToSkip) {
throw new IllegalStateException("Could not skip to position " +
position + ", tried to read " + bytesToSkip +
" but only read " + skipped + " bytes!");
}
}
@Override
public int read(byte[] buf, int off, int len) throws IOException {
int bytesRead = readInternal(buf, off, len);
currentStreamState.advanceOffsetInLedger(bytesRead);
return bytesRead;
}
private int readInternal(byte[] buf, int off, int len) throws IOException {
if (maxLedgerEntryIdSeen == -1) {
// If this is an in-progress ledger, find out the true "tail" of the
// ledger
maxLedgerEntryIdSeen = ledger.getLastAddConfirmed();
if (maxLedgerEntryIdSeen == -1) { // Nothing has been added to the ledger
return 0;
}
}
if (entryStream == null) {
// If we are the end of the current entry, fetch the next one
entryStream = nextEntryStream();
if (entryStream == null) { // We are the end of the ledger
return 0;
}
}
// The calling classes may want to read a sequence of bytes that is
// spread across multiple ledger entries. In this case, we will need to
// in a loop: maintain the number of bytes read so far (the offset into
// the buffer), when we reach the end of the current ledger entry, use
// nextEntryStream() to begin reading the next ledger entry
int bytesReadTotal = 0;
while (bytesReadTotal < len) {
int bytesReadLast = entryStream.read(buf, off + bytesReadTotal,
len - bytesReadTotal);
if (bytesReadLast == -1) {
entryStream = nextEntryStream();
if (entryStream == null) {
return bytesReadTotal;
}
} else {
currentStreamState.advanceOffsetInEntry(bytesReadLast);
bytesReadTotal += bytesReadLast;
}
}
return bytesReadTotal;
}
public long getLedgerLength() {
return ledger.getLength();
}
public String getLedgerName() {
return ledger.toString();
}
public void close() throws IOException {
try {
ledger.close();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IOException("Interrupted during close()", e);
} catch (BKException e) {
throw new IOException("BookKeeper error during close()", e);
}
}
}