/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs; import static org.apache.hadoop.hdfs.protocol.HdfsProtoUtil.vintPrefixed; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.net.InetSocketAddress; import java.net.Socket; import java.nio.ByteBuffer; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.FSInputChecker; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtoUtil; import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader; import org.apache.hadoop.hdfs.protocol.datatransfer.Sender; import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto; import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.ReadOpChecksumInfoProto; import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.DataChecksum; /** * @deprecated this is an old implementation that is being left around * in case any issues spring up with the new {@link RemoteBlockReader2} implementation. * It will be removed in the next release. */ @InterfaceAudience.Private @Deprecated public class RemoteBlockReader extends FSInputChecker implements BlockReader { Socket dnSock; //for now just sending the status code (e.g. checksumOk) after the read. private DataInputStream in; private DataChecksum checksum; /** offset in block of the last chunk received */ private long lastChunkOffset = -1; private long lastChunkLen = -1; private long lastSeqNo = -1; /** offset in block where reader wants to actually read */ private long startOffset; /** offset in block of of first chunk - may be less than startOffset if startOffset is not chunk-aligned */ private final long firstChunkOffset; private int bytesPerChecksum; private int checksumSize; /** * The total number of bytes we need to transfer from the DN. * This is the amount that the user has requested plus some padding * at the beginning so that the read can begin on a chunk boundary. */ private final long bytesNeededToFinish; private boolean eos = false; private boolean sentStatusCode = false; byte[] skipBuf = null; ByteBuffer checksumBytes = null; /** Amount of unread data in the current received packet */ int dataLeft = 0; /* FSInputChecker interface */ /* same interface as inputStream java.io.InputStream#read() * used by DFSInputStream#read() * This violates one rule when there is a checksum error: * "Read should not modify user buffer before successful read" * because it first reads the data to user buffer and then checks * the checksum. */ @Override public synchronized int read(byte[] buf, int off, int len) throws IOException { // This has to be set here, *before* the skip, since we can // hit EOS during the skip, in the case that our entire read // is smaller than the checksum chunk. boolean eosBefore = eos; //for the first read, skip the extra bytes at the front. if (lastChunkLen < 0 && startOffset > firstChunkOffset && len > 0) { // Skip these bytes. But don't call this.skip()! int toSkip = (int)(startOffset - firstChunkOffset); if ( skipBuf == null ) { skipBuf = new byte[bytesPerChecksum]; } if ( super.read(skipBuf, 0, toSkip) != toSkip ) { // should never happen throw new IOException("Could not skip required number of bytes"); } } int nRead = super.read(buf, off, len); // if eos was set in the previous read, send a status code to the DN if (eos && !eosBefore && nRead >= 0) { if (needChecksum()) { sendReadResult(dnSock, Status.CHECKSUM_OK); } else { sendReadResult(dnSock, Status.SUCCESS); } } return nRead; } @Override public synchronized long skip(long n) throws IOException { /* How can we make sure we don't throw a ChecksumException, at least * in majority of the cases?. This one throws. */ if ( skipBuf == null ) { skipBuf = new byte[bytesPerChecksum]; } long nSkipped = 0; while ( nSkipped < n ) { int toSkip = (int)Math.min(n-nSkipped, skipBuf.length); int ret = read(skipBuf, 0, toSkip); if ( ret <= 0 ) { return nSkipped; } nSkipped += ret; } return nSkipped; } @Override public int read() throws IOException { throw new IOException("read() is not expected to be invoked. " + "Use read(buf, off, len) instead."); } @Override public boolean seekToNewSource(long targetPos) throws IOException { /* Checksum errors are handled outside the BlockReader. * DFSInputStream does not always call 'seekToNewSource'. In the * case of pread(), it just tries a different replica without seeking. */ return false; } @Override public void seek(long pos) throws IOException { throw new IOException("Seek() is not supported in BlockInputChecker"); } @Override protected long getChunkPosition(long pos) { throw new RuntimeException("getChunkPosition() is not supported, " + "since seek is not required"); } /** * Makes sure that checksumBytes has enough capacity * and limit is set to the number of checksum bytes needed * to be read. */ private void adjustChecksumBytes(int dataLen) { int requiredSize = ((dataLen + bytesPerChecksum - 1)/bytesPerChecksum)*checksumSize; if (checksumBytes == null || requiredSize > checksumBytes.capacity()) { checksumBytes = ByteBuffer.wrap(new byte[requiredSize]); } else { checksumBytes.clear(); } checksumBytes.limit(requiredSize); } @Override protected synchronized int readChunk(long pos, byte[] buf, int offset, int len, byte[] checksumBuf) throws IOException { // Read one chunk. if (eos) { // Already hit EOF return -1; } // Read one DATA_CHUNK. long chunkOffset = lastChunkOffset; if ( lastChunkLen > 0 ) { chunkOffset += lastChunkLen; } // pos is relative to the start of the first chunk of the read. // chunkOffset is relative to the start of the block. // This makes sure that the read passed from FSInputChecker is the // for the same chunk we expect to be reading from the DN. if ( (pos + firstChunkOffset) != chunkOffset ) { throw new IOException("Mismatch in pos : " + pos + " + " + firstChunkOffset + " != " + chunkOffset); } // Read next packet if the previous packet has been read completely. if (dataLeft <= 0) { //Read packet headers. PacketHeader header = new PacketHeader(); header.readFields(in); if (LOG.isDebugEnabled()) { LOG.debug("DFSClient readChunk got header " + header); } // Sanity check the lengths if (!header.sanityCheck(lastSeqNo)) { throw new IOException("BlockReader: error in packet header " + header); } lastSeqNo = header.getSeqno(); dataLeft = header.getDataLen(); adjustChecksumBytes(header.getDataLen()); if (header.getDataLen() > 0) { IOUtils.readFully(in, checksumBytes.array(), 0, checksumBytes.limit()); } } // Sanity checks assert len >= bytesPerChecksum; assert checksum != null; assert checksumSize == 0 || (checksumBuf.length % checksumSize == 0); int checksumsToRead, bytesToRead; if (checksumSize > 0) { // How many chunks left in our packet - this is a ceiling // since we may have a partial chunk at the end of the file int chunksLeft = (dataLeft - 1) / bytesPerChecksum + 1; // How many chunks we can fit in databuffer // - note this is a floor since we always read full chunks int chunksCanFit = Math.min(len / bytesPerChecksum, checksumBuf.length / checksumSize); // How many chunks should we read checksumsToRead = Math.min(chunksLeft, chunksCanFit); // How many bytes should we actually read bytesToRead = Math.min( checksumsToRead * bytesPerChecksum, // full chunks dataLeft); // in case we have a partial } else { // no checksum bytesToRead = Math.min(dataLeft, len); checksumsToRead = 0; } if ( bytesToRead > 0 ) { // Assert we have enough space assert bytesToRead <= len; assert checksumBytes.remaining() >= checksumSize * checksumsToRead; assert checksumBuf.length >= checksumSize * checksumsToRead; IOUtils.readFully(in, buf, offset, bytesToRead); checksumBytes.get(checksumBuf, 0, checksumSize * checksumsToRead); } dataLeft -= bytesToRead; assert dataLeft >= 0; lastChunkOffset = chunkOffset; lastChunkLen = bytesToRead; // If there's no data left in the current packet after satisfying // this read, and we have satisfied the client read, we expect // an empty packet header from the DN to signify this. // Note that pos + bytesToRead may in fact be greater since the // DN finishes off the entire last chunk. if (dataLeft == 0 && pos + bytesToRead >= bytesNeededToFinish) { // Read header PacketHeader hdr = new PacketHeader(); hdr.readFields(in); if (!hdr.isLastPacketInBlock() || hdr.getDataLen() != 0) { throw new IOException("Expected empty end-of-read packet! Header: " + hdr); } eos = true; } if ( bytesToRead == 0 ) { return -1; } return bytesToRead; } private RemoteBlockReader(String file, String bpid, long blockId, DataInputStream in, DataChecksum checksum, boolean verifyChecksum, long startOffset, long firstChunkOffset, long bytesToRead, Socket dnSock) { // Path is used only for printing block and file information in debug super(new Path("/blk_" + blockId + ":" + bpid + ":of:"+ file)/*too non path-like?*/, 1, verifyChecksum, checksum.getChecksumSize() > 0? checksum : null, checksum.getBytesPerChecksum(), checksum.getChecksumSize()); this.dnSock = dnSock; this.in = in; this.checksum = checksum; this.startOffset = Math.max( startOffset, 0 ); // The total number of bytes that we need to transfer from the DN is // the amount that the user wants (bytesToRead), plus the padding at // the beginning in order to chunk-align. Note that the DN may elect // to send more than this amount if the read starts/ends mid-chunk. this.bytesNeededToFinish = bytesToRead + (startOffset - firstChunkOffset); this.firstChunkOffset = firstChunkOffset; lastChunkOffset = firstChunkOffset; lastChunkLen = -1; bytesPerChecksum = this.checksum.getBytesPerChecksum(); checksumSize = this.checksum.getChecksumSize(); } public static RemoteBlockReader newBlockReader(Socket sock, String file, ExtendedBlock block, Token<BlockTokenIdentifier> blockToken, long startOffset, long len, int bufferSize) throws IOException { return newBlockReader(sock, file, block, blockToken, startOffset, len, bufferSize, true, ""); } /** * Create a new BlockReader specifically to satisfy a read. * This method also sends the OP_READ_BLOCK request. * * @param sock An established Socket to the DN. The BlockReader will not close it normally * @param file File location * @param block The block object * @param blockToken The block token for security * @param startOffset The read offset, relative to block head * @param len The number of bytes to read * @param bufferSize The IO buffer size (not the client buffer size) * @param verifyChecksum Whether to verify checksum * @param clientName Client name * @return New BlockReader instance, or null on error. */ public static RemoteBlockReader newBlockReader( Socket sock, String file, ExtendedBlock block, Token<BlockTokenIdentifier> blockToken, long startOffset, long len, int bufferSize, boolean verifyChecksum, String clientName) throws IOException { // in and out will be closed when sock is closed (by the caller) final DataOutputStream out = new DataOutputStream(new BufferedOutputStream( NetUtils.getOutputStream(sock, HdfsServerConstants.WRITE_TIMEOUT))); new Sender(out).readBlock(block, blockToken, clientName, startOffset, len); // // Get bytes in block, set streams // DataInputStream in = new DataInputStream( new BufferedInputStream(NetUtils.getInputStream(sock), bufferSize)); BlockOpResponseProto status = BlockOpResponseProto.parseFrom( vintPrefixed(in)); RemoteBlockReader2.checkSuccess(status, sock, block, file); ReadOpChecksumInfoProto checksumInfo = status.getReadOpChecksumInfo(); DataChecksum checksum = DataTransferProtoUtil.fromProto( checksumInfo.getChecksum()); //Warning when we get CHECKSUM_NULL? // Read the first chunk offset. long firstChunkOffset = checksumInfo.getChunkOffset(); if ( firstChunkOffset < 0 || firstChunkOffset > startOffset || firstChunkOffset >= (startOffset + checksum.getBytesPerChecksum())) { throw new IOException("BlockReader: error in first chunk offset (" + firstChunkOffset + ") startOffset is " + startOffset + " for file " + file); } return new RemoteBlockReader(file, block.getBlockPoolId(), block.getBlockId(), in, checksum, verifyChecksum, startOffset, firstChunkOffset, len, sock); } @Override public synchronized void close() throws IOException { startOffset = -1; checksum = null; if (dnSock != null) { dnSock.close(); } // in will be closed when its Socket is closed. } @Override public void readFully(byte[] buf, int readOffset, int amtToRead) throws IOException { IOUtils.readFully(this, buf, readOffset, amtToRead); } @Override public int readAll(byte[] buf, int offset, int len) throws IOException { return readFully(this, buf, offset, len); } @Override public Socket takeSocket() { assert hasSentStatusCode() : "BlockReader shouldn't give back sockets mid-read"; Socket res = dnSock; dnSock = null; return res; } @Override public boolean hasSentStatusCode() { return sentStatusCode; } /** * When the reader reaches end of the read, it sends a status response * (e.g. CHECKSUM_OK) to the DN. Failure to do so could lead to the DN * closing our connection (which we will re-open), but won't affect * data correctness. */ void sendReadResult(Socket sock, Status statusCode) { assert !sentStatusCode : "already sent status code to " + sock; try { RemoteBlockReader2.writeReadResult(sock, statusCode); sentStatusCode = true; } catch (IOException e) { // It's ok not to be able to send this. But something is probably wrong. LOG.info("Could not send read status (" + statusCode + ") to datanode " + sock.getInetAddress() + ": " + e.getMessage()); } } /** * File name to print when accessing a block directly (from servlets) * @param s Address of the block location * @param poolId Block pool ID of the block * @param blockId Block ID of the block * @return string that has a file name for debug purposes */ public static String getFileName(final InetSocketAddress s, final String poolId, final long blockId) { return s.toString() + ":" + poolId + ":" + blockId; } }