/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.hdfs.metrics.DFSClientMetrics;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.BlockPathInfo;
import org.apache.hadoop.hdfs.server.datanode.BlockInlineChecksumReader;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.nativeio.NativeIO;
import org.apache.hadoop.util.DataChecksum;
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
/**
* Read a local block for inline checksum format.
*
*/
public class BlockReaderLocalInlineChecksum extends BlockReaderLocalBase {
private boolean needVerifyChecksum = false;
private final FileChannel dataFileChannel; // reader for the data file
private final FileDescriptor dataFileDescriptor;
/**
* Constructor for the cast when checksum is not to be verified
*
* @param conf
* @param hdfsfile
* file name of the HDFS file
* @param block
* @param startOffset
* start offset to read (block offset)
* @param length
* length of the block
* @param pathinfo
* local path information of the block
* @param metrics
* @param checksum
* @param dataFileChannel
* file channel of data file
* @param dataFileDescriptor
* file descriptor of the data file
* @param clearOsBuffer
* whether clear OS buffer after read
* @param positionalReadMode
* whether the object is for positional read
* @throws IOException
*/
BlockReaderLocalInlineChecksum(Configuration conf, String hdfsfile,
Block block, long startOffset, long length, BlockPathInfo pathinfo,
DFSClientMetrics metrics, DataChecksum checksum,
FileChannel dataFileChannel, FileDescriptor dataFileDescriptor,
boolean clearOsBuffer, boolean positionalReadMode) throws IOException {
super(conf, hdfsfile, block, startOffset, length, pathinfo, metrics,
clearOsBuffer, positionalReadMode);
this.bytesPerChecksum = checksum.getBytesPerChecksum();
this.checksumSize = checksum.getChecksumSize();
this.checksum = checksum;
this.dataFileChannel = dataFileChannel;
this.dataFileDescriptor = dataFileDescriptor;
if (!positionalReadMode) {
long chunkOffset = startOffset % bytesPerChecksum;
long startPosInFile = BlockInlineChecksumReader.getPosFromBlockOffset(
startOffset - chunkOffset, bytesPerChecksum, checksumSize)
+ chunkOffset;
this.dataFileChannel.position(startPosInFile);
}
}
/**
* Constructor for the case when checksum needs to be verified.
*
* @param conf
* @param hdfsfile
* file name of the HDFS file
* @param block
* @param startOffset
* start offset of the block to read
* @param length
* length of the block
* @param pathinfo
* object for local path of the block
* @param metrics
* @param checksum
* @param verifyChecksum
* whether need to verify checksum when read
* @param dataFileChannel
* file channel of local file for the block
* @param dataFileDescriptor
* file descriptor of the opened local block file
* @param clearOsBuffer
* whether clear OS cache after reading
* @param positionalReadMode
* whether it is for positional read
* @throws IOException
*/
BlockReaderLocalInlineChecksum(Configuration conf, String hdfsfile,
Block block, long startOffset, long length, BlockPathInfo pathinfo,
DFSClientMetrics metrics, DataChecksum checksum, boolean verifyChecksum,
FileChannel dataFileChannel, FileDescriptor dataFileDescriptor,
boolean clearOsBuffer, boolean positionalReadMode) throws IOException {
super(conf, hdfsfile, block, startOffset, length, pathinfo, metrics,
checksum, verifyChecksum, clearOsBuffer, positionalReadMode);
sum = null;
this.dataFileChannel = dataFileChannel;
this.dataFileDescriptor = dataFileDescriptor;
this.needVerifyChecksum = verifyChecksum;
long blockLength = pathinfo.getNumBytes();
if (positionalReadMode) {
// We don't need to set initial offsets of the file if
// the reader is for positional reads.
return;
}
// if the requested size exceeds the currently known length of the file
// then check the blockFile to see if its length has grown. This can
// occur if the file is being concurrently written to while it is being
// read too. If the blockFile has grown in size, then update the new
// size in our cache.
if (startOffset > blockLength || (length + startOffset) > blockLength) {
File blkFile = new File(pathinfo.getBlockPath());
long diskFileLength = blkFile.length();
long newlength = BlockInlineChecksumReader.getBlockSizeFromFileLength(
diskFileLength, checksum.getChecksumType(),
checksum.getChecksumSize());
LOG.warn("BlockReaderLocal found short block " + blkFile
+ " requested offset " + startOffset + " length " + length
+ " but known size of block is " + blockLength + ", size on disk is "
+ newlength);
if (newlength > blockLength) {
blockLength = newlength;
pathinfo.setNumBytes(newlength);
}
}
long endOffset = blockLength;
if (startOffset < 0 || startOffset > endOffset
|| (length + startOffset) > endOffset) {
String msg = " Offset " + startOffset + " and length " + length
+ " don't match block " + block + " ( blockLen " + endOffset + " )";
LOG.warn("BlockReaderLocal requested with incorrect offset: " + msg);
throw new IOException(msg);
}
firstChunkOffset = (startOffset - (startOffset % bytesPerChecksum));
// seek to the right offsets
if (firstChunkOffset > 0) {
long offsetInFile = BlockInlineChecksumReader.getPosFromBlockOffset(
firstChunkOffset, bytesPerChecksum, checksumSize);
dataFileChannel.position(offsetInFile);
}
lastChunkOffset = firstChunkOffset;
lastChunkLen = -1;
}
public long getBlockLength() throws IOException {
return BlockInlineChecksumReader.getBlockSizeFromFileLength(
dataFileChannel.size(), checksum.getChecksumType(), bytesPerChecksum);
}
/**
* Read from the current position of the file.
*
* @param fileChannel
* file channel to read from
* @param len
* number of bytes are requested
* @param skipBytes
* first number of bytes shouldn't go to user buffer. They need to be
* read only for checksum verification purpose.
* @param outBuffer
* output buffer
* @param outStartPos
* starting position to fill the output buffer
* @return how many bytes are actually read.
* @throws IOException
*/
private int read(int len, int skipBytes, byte[] outBuffer, int outStartPos)
throws IOException {
if (outBuffer.length - outStartPos < len) {
len = outBuffer.length - outStartPos;
}
// Calculate current block size from current file position
long filePos = dataFileChannel.position();
long blockPos = BlockInlineChecksumReader.getBlockSizeFromFileLength(
filePos - filePos % (checksumSize + bytesPerChecksum), checksum.getChecksumType(),
bytesPerChecksum)
+ filePos % (checksumSize + bytesPerChecksum);
long startChunkOffset; // start offset in the current chunk
if (blockPos % bytesPerChecksum != 0) {
// Current position is not the beginning of a chunk. It is only
// allowed if checksum is not verified.
if (needVerifyChecksum) {
throw new IOException(
"Cannot read from middle of a chunk and verify checksum.");
}
startChunkOffset = blockPos % bytesPerChecksum;
} else {
startChunkOffset = 0;
}
// Get current block size
long blockSize = getBlockLength();
int totalLen = len + skipBytes; // total size to
// read including
// the bytes to
// skip
if (totalLen + blockPos > blockSize) {
// If the number of bytes requests is more than bytes available,
// shrink the length to read up to the end of the file.
totalLen = (int) (blockSize - blockPos);
len = totalLen - skipBytes;
}
long lastChunkSize; // size of the last chunk to read
long endBlockPos = blockPos + totalLen;
if (endBlockPos % bytesPerChecksum != 0
&& blockSize % bytesPerChecksum != 0
&& endBlockPos > blockSize - (blockSize % bytesPerChecksum)) {
// This is the case that the last chunk of the file is partial and the
// read request needs to read to the last chunk (not necessarily the full
// chunk is requested).
lastChunkSize = blockSize % bytesPerChecksum;
} else {
lastChunkSize = bytesPerChecksum;
}
int numChunks = (totalLen + (int) startChunkOffset - 1) / bytesPerChecksum + 1;
int totalBytesToRead = (int) ((bytesPerChecksum + checksumSize)
* (numChunks - 1) + lastChunkSize - startChunkOffset);
if (needVerifyChecksum || totalLen % bytesPerChecksum == 0) {
totalBytesToRead += checksumSize;
}
// We first copy to a temp buffer and then to
// final user buffer to reduce number of file system calls.
// We pay a new byte array allocation and another mem copy for it.
ByteBuffer tempBuffer = ByteBuffer.allocate(totalBytesToRead);
IOUtils.readFileChannelFully(dataFileChannel, tempBuffer, 0,
totalBytesToRead, true);
tempBuffer.flip();
// Up to here, we calculated what we need to read and how:
// numChunks needs to be read. Obviously, all chunks other than the first
// chunk
// and the last chunk need to be read fully directly to user buffer.
//
// For the first chunk:
// data needs to read from startChunkOffset to the end of the chunk. If
// there
// is only one chunk, the lastChunkSize - startChunkOffset needs to be read.
// Otherwise, it is bytesPerChecksum - startChunkOffset.
// The first skipBytes bytes will be skipped and from the next bytes will be
// filled to user buffer, until the expected number of bytes are filled, or
// the end of the chunk.
//
// For the last chunk (for more than one chunk):
// there are lastChunkSize bytes in the last chunk. If checksum checking is
// required, all the bytes need to be read. But not necessarily all the
// bytes
// are needed to user buffer. User buffer is only filled up to len.
int remain = totalLen;
int bytesFilledBuffer = 0; // how many bytes are filled into user's buffer
for (int i = 0; i < numChunks; i++) {
assert remain > 0;
long chunkActualRead;
int endChunkOffsetToRead; // how many bytes should be returned to user's
// buffer
int lenToRead; // How many bytes to read from the file
int headBytesToSkip = 0; // extra bytes not to copied to user's buffer.
if (needVerifyChecksum) {
endChunkOffsetToRead = (remain > bytesPerChecksum) ? bytesPerChecksum
: remain;
// The case that checksum needs to be verified.
//
if (i == numChunks - 1) {
// It's the last chunk and checksum needs to be verified. (could be the
// first one in the same time)
// We read to read lastChunkSize;
lenToRead = (int) lastChunkSize;
} else {
lenToRead = bytesPerChecksum;
}
if (i == 0 && skipBytes > 0) {
// checksum needs to be verified and there is extra bytes need to be
// skipped. We need to use a separate buffer to verify checksum and
// then copy bytes needed to user buffer.
headBytesToSkip = skipBytes;
}
} else {
// Case that checksum doesn't need to be verified.
//
if (i == numChunks - 1) {
endChunkOffsetToRead = remain;
} else if (i == 0) {
// It's the first chunk. Bytes need to be read are the chunk size -
// startChunkOffset.
endChunkOffsetToRead = bytesPerChecksum - (int) startChunkOffset;
} else {
endChunkOffsetToRead = bytesPerChecksum;
}
lenToRead = endChunkOffsetToRead;
}
if (needVerifyChecksum) {
// verify checksum.
checksum.reset();
checksum.update(tempBuffer.array(), tempBuffer.position(), lenToRead);
if (!checksum.compare(tempBuffer.array(), tempBuffer.position()
+ lenToRead)) {
throw new ChecksumException("Checksum failed at "
+ (blockPos + len - remain), len);
}
}
if (headBytesToSkip > 0) {
tempBuffer.position(tempBuffer.position() + headBytesToSkip);
}
chunkActualRead = endChunkOffsetToRead - headBytesToSkip;
tempBuffer.get(outBuffer, outStartPos + bytesFilledBuffer,
(int) chunkActualRead);
if (i != numChunks - 1) {
tempBuffer.position(tempBuffer.position() + checksumSize);
}
bytesFilledBuffer += chunkActualRead;
remain -= endChunkOffsetToRead;
}
return bytesFilledBuffer;
}
private int readNonPositional(byte[] buf, int off, int len)
throws IOException {
if (positionalReadMode) {
throw new IOException(
"Try to do sequential read using a block reader for positional read.");
}
return read(len, 0, buf, off);
}
@Override
public synchronized int read(byte[] buf, int off, int len) throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("BlockChecksumFileSystem read off " + off + " len " + len);
}
metrics.readsFromLocalFile.inc();
int byteRead;
if (!needVerifyChecksum) {
byteRead = readNonPositional(buf, off, len);
updateStatsAfterRead(byteRead);
} else {
byteRead = super.read(buf, off, len);
}
if (clearOsBuffer) {
// drop all pages from the OS buffer cache
NativeIO.posixFadviseIfPossible(dataFileDescriptor, off, len,
NativeIO.POSIX_FADV_DONTNEED);
}
return byteRead;
}
public synchronized int read(long pos, byte[] buf, int off, int len)
throws IOException {
if (!positionalReadMode) {
throw new IOException(
"Try to do positional read using a block reader forsequantial read.");
}
int skipBytes = (int) (pos % bytesPerChecksum);
long filePos = BlockInlineChecksumReader.getPosFromBlockOffset(pos
- skipBytes, bytesPerChecksum, checksumSize);
if (needVerifyChecksum) {
dataFileChannel.position(filePos);
return read(len, skipBytes, buf, off);
} else {
dataFileChannel.position(filePos + skipBytes);
return read(len, 0, buf, off);
}
}
@Override
public synchronized long skip(long n) throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("BlockChecksumFileSystem skip " + n);
}
if (!needVerifyChecksum) {
long currentPos = dataFileChannel.position();
long fileSize = dataFileChannel.size();
long newPos;
long currentBlockPos = BlockInlineChecksumReader.getPosFromBlockOffset(
fileSize, bytesPerChecksum, checksumSize);
long newBlockPos = currentBlockPos + n;
long partialChunk = newBlockPos % bytesPerChecksum;
newPos = BlockInlineChecksumReader.getPosFromBlockOffset(newBlockPos
- partialChunk, bytesPerChecksum, checksumSize);
if (partialChunk != 0) {
newPos += checksumSize;
}
long skipped = n;
if (newPos > fileSize) {
skipped = fileSize - currentPos;
newPos = fileSize;
}
dataFileChannel.position(newPos);
return skipped;
} else {
return super.skip(n);
}
}
@Override
protected synchronized int readChunk(long pos, byte[] buf, int offset,
int len, byte[] checksumBuf) throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("Reading chunk from position " + pos + " at offset " + offset
+ " with length " + len);
}
if (eos) {
if (startOffset < 0) {
// This is mainly for debugging. can be removed.
throw new IOException("BlockRead: already got EOS or an error");
}
startOffset = -1;
return -1;
}
if (checksumBuf.length != checksumSize) {
throw new IOException("Cannot read checksum into provided buffer. "
+ "The buffer must be exactly '" + checksumSize
+ "' bytes long to hold the checksum bytes.");
}
if ((pos + firstChunkOffset) != lastChunkOffset) {
throw new IOException("Mismatch in pos : " + pos + " + "
+ firstChunkOffset + " != " + lastChunkOffset);
}
int nRead = readNonPositional(buf, offset, bytesPerChecksum);
if (nRead < bytesPerChecksum) {
eos = true;
}
lastChunkOffset += nRead;
lastChunkLen = nRead;
return nRead;
}
/**
* Channel Transfer is not possible for inline checksum. To keep backward
* compatible, we call normal read() to return the same data.
*/
public ByteBuffer readAll() throws IOException {
ByteBuffer bb = ByteBuffer.allocate((int) length);
if (positionalReadMode) {
read(0, bb.array(), 0, (int) length);
return bb;
} else {
read(bb.array(), 0, (int) length);
return bb;
}
}
@Override
public synchronized void close() throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("BlockChecksumFileSystem close");
}
dataFileChannel.close();
}
}