/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package tachyon.client;
import java.io.IOException;
import java.io.InputStream;
import java.net.*;
import java.nio.ByteBuffer;
import java.nio.channels.SocketChannel;
import java.util.List;
import com.mellanox.jxio.jxioConnection.JxioConnection;
import org.apache.log4j.Logger;
import tachyon.Constants;
import tachyon.UnderFileSystem;
import tachyon.conf.UserConf;
import tachyon.thrift.ClientBlockInfo;
import tachyon.thrift.NetAddress;
import tachyon.worker.DataServerMessage;
import tachyon.util.CommonUtils;
/**
* BlockInStream for remote block.
*/
public class RemoteBlockInStream extends BlockInStream {
private static final int BUFFER_SIZE = UserConf.get().REMOTE_READ_BUFFER_SIZE_BYTE;
private final Logger LOG = Logger.getLogger(Constants.LOGGER_TYPE);
private ClientBlockInfo mBlockInfo;
private InputStream mCheckpointInputStream = null;
private long mReadByte;
private ByteBuffer mCurrentBuffer = null;
private long mBufferStartPosition = 0;
private boolean mRecache = true;
private BlockOutStream mBlockOutStream = null;
private Object mUFSConf = null;
/**
* @param file
* the file the block belongs to
* @param readType
* the InStream's read type
* @param blockIndex
* the index of the block in the file
* @throws IOException
*/
RemoteBlockInStream(TachyonFile file, ReadType readType, int blockIndex) throws IOException {
this(file, readType, blockIndex, null);
}
/**
* @param file
* the file the block belongs to
* @param readType
* the InStream's read type
* @param blockIndex
* the index of the block in the file
* @param ufsConf
* the under file system configuration
* @throws IOException
*/
RemoteBlockInStream(TachyonFile file, ReadType readType, int blockIndex, Object ufsConf)
throws IOException {
super(file, readType, blockIndex);
mBlockInfo = TFS.getClientBlockInfo(FILE.FID, BLOCK_INDEX);
mReadByte = 0;
mBufferStartPosition = 0;
if (!FILE.isComplete()) {
throw new IOException("File " + FILE.getPath() + " is not ready to read");
}
mRecache = readType.isCache();
if (mRecache) {
mBlockOutStream = new BlockOutStream(file, WriteType.TRY_CACHE, blockIndex);
}
updateCurrentBuffer();
mUFSConf = ufsConf;
if (mCurrentBuffer == null) {
setupStreamFromUnderFs(mBlockInfo.offset, mUFSConf);
if (mCheckpointInputStream == null) {
TFS.reportLostFile(FILE.FID);
throw new IOException("Can not find the block " + FILE + " " + BLOCK_INDEX);
}
}
}
@Override
public void close() throws IOException {
if (!mClosed) {
if (mRecache) {
mBlockOutStream.cancel();
}
if (mCheckpointInputStream != null) {
mCheckpointInputStream.close();
}
}
mClosed = true;
}
private void doneRecache() throws IOException {
if (mRecache) {
mBlockOutStream.close();
}
}
@Override
public int read() throws IOException {
mReadByte ++;
if (mReadByte > mBlockInfo.length) {
doneRecache();
return -1;
}
if (mCurrentBuffer != null) {
if (mCurrentBuffer.remaining() == 0) {
mBufferStartPosition = mReadByte - 1;
updateCurrentBuffer();
}
if (mCurrentBuffer != null) {
int ret = mCurrentBuffer.get() & 0xFF;
if (mRecache) {
mBlockOutStream.write(ret);
}
return ret;
}
setupStreamFromUnderFs(mBlockInfo.offset + mReadByte - 1, mUFSConf);
}
int ret = mCheckpointInputStream.read() & 0xFF;
if (mRecache) {
mBlockOutStream.write(ret);
}
return ret;
}
@Override
public int read(byte b[]) throws IOException {
return read(b, 0, b.length);
}
@Override
public int read(byte b[], int off, int len) throws IOException {
if (b == null) {
throw new NullPointerException();
} else if (off < 0 || len < 0 || len > b.length - off) {
throw new IndexOutOfBoundsException();
} else if (len == 0) {
return 0;
}
long ret = mBlockInfo.length - mReadByte;
if (ret < len) {
len = (int) ret;
}
if (ret == 0) {
return -1;
}
if (mCurrentBuffer != null) {
if (mCurrentBuffer.remaining() == 0) {
mBufferStartPosition = mReadByte;
updateCurrentBuffer();
}
if (mCurrentBuffer != null) {
ret = Math.min(ret, mCurrentBuffer.remaining());
ret = Math.min(ret, len);
mCurrentBuffer.get(b, off, (int) ret);
mReadByte += ret;
if (mRecache) {
mBlockOutStream.write(b, off, (int) ret);
if (mReadByte == mBlockInfo.length) {
doneRecache();
}
}
return (int) ret;
}
setupStreamFromUnderFs(mBlockInfo.offset + mReadByte, mUFSConf);
}
ret = mCheckpointInputStream.read(b, off, len);
mReadByte += ret;
if (mRecache) {
mBlockOutStream.write(b, off, (int) ret);
if (mReadByte == mBlockInfo.length) {
doneRecache();
}
}
return (int) ret;
}
private ByteBuffer readRemoteByteBuffer(ClientBlockInfo blockInfo, long offset, long len) {
ByteBuffer buf = null;
try {
List<NetAddress> blockLocations = blockInfo.getLocations();
LOG.info("Block locations:" + blockLocations);
for (int k = 0; k < blockLocations.size(); k ++) {
String host = blockLocations.get(k).mHost;
int port = blockLocations.get(k).mPort;
// The data is not in remote machine's memory if port == -1.
if (port == -1) {
continue;
}
if (host.equals(InetAddress.getLocalHost().getHostName())
|| host.equals(InetAddress.getLocalHost().getHostAddress())) {
String localFileName = CommonUtils.concat(TFS.getRootFolder(), blockInfo.blockId);
LOG.warn("Master thinks the local machine has data " + localFileName + "! But not!");
}
LOG.info(host + ":" + (port + 1) + " current host is "
+ InetAddress.getLocalHost().getHostName() + " "
+ InetAddress.getLocalHost().getHostAddress());
try {
buf =
retrieveByteBufferFromRemoteMachine(new InetSocketAddress(host, port + 1),
blockInfo.blockId, offset, len);
if (buf != null) {
break;
}
} catch (IOException e) {
LOG.error(e.getMessage());
buf = null;
}
}
} catch (IOException e) {
LOG.error("Failed to get read data from remote " + e.getMessage());
buf = null;
}
return buf;
}
private ByteBuffer retrieveByteBufferFromRemoteMachine(InetSocketAddress address, long blockId,
long offset, long length) throws IOException {
DataServerMessage recvMsg = DataServerMessage.createBlockResponseMessage(false, blockId);
String uri = String.format("rdma://%s:%d/blockId=%d", address.getHostName(), address.getPort(),blockId);
try {
JxioConnection connect = new JxioConnection(new URI(uri));
connect.setRcvSize(655360);
InputStream input = connect.getInputStream();
recvMsg.recv(input);
connect.disconnect();
} catch (URISyntaxException e) {
throw new IOException("Could not init rdma init");
} catch (ConnectException e) {
throw new IOException("Could not connect to rdma");
}
if (!recvMsg.isMessageReady()) {
LOG.info("Data " + blockId + " from remote machine is not ready.");
return null;
}
if (recvMsg.getBlockId() < 0) {
LOG.info("Data " + recvMsg.getBlockId() + " is not in remote machine.");
return null;
}
return recvMsg.getReadOnlyData();
}
@Override
public void seek(long pos) throws IOException {
if (pos < 0) {
throw new IOException("pos is negative: " + pos);
}
mRecache = false;
if (mCurrentBuffer != null) {
mReadByte = pos;
if (mBufferStartPosition <= pos && pos < mBufferStartPosition + mCurrentBuffer.limit()) {
mCurrentBuffer.position((int) (pos - mBufferStartPosition));
} else {
mBufferStartPosition = pos;
updateCurrentBuffer();
}
} else {
if (mCheckpointInputStream != null) {
mCheckpointInputStream.close();
}
setupStreamFromUnderFs(mBlockInfo.offset + pos, mUFSConf);
}
}
private void setupStreamFromUnderFs(long offset, Object conf) throws IOException {
String checkpointPath = TFS.getUfsPath(FILE.FID);
if (!checkpointPath.equals("")) {
LOG.info("May stream from underlayer fs: " + checkpointPath);
UnderFileSystem underfsClient = UnderFileSystem.get(checkpointPath, conf);
try {
mCheckpointInputStream = underfsClient.open(checkpointPath);
while (offset > 0) {
long skipped = mCheckpointInputStream.skip(offset);
offset -= skipped;
if (skipped == 0) {
throw new IOException("Failed to find the start position " + offset + " for block "
+ mBlockInfo);
}
}
} catch (IOException e) {
LOG.error("Failed to read from checkpoint " + checkpointPath + " for File " + FILE.FID
+ "\n" + e);
mCheckpointInputStream = null;
}
}
}
@Override
public long skip(long n) throws IOException {
if (n <= 0) {
return 0;
}
long ret = mBlockInfo.length - mReadByte;
if (ret > n) {
ret = n;
}
if (mCurrentBuffer != null) {
if (mCurrentBuffer.remaining() < ret) {
mBufferStartPosition = mReadByte + ret;
updateCurrentBuffer();
}
if (mCurrentBuffer != null) {
if (ret > 0) {
if (mRecache) {
mBlockOutStream.cancel();
}
mRecache = false;
}
return (int) ret;
}
setupStreamFromUnderFs(mBlockInfo.offset + mReadByte, mUFSConf);
}
long tmp = mCheckpointInputStream.skip(ret);
ret = Math.min(ret, tmp);
mReadByte += ret;
if (ret > 0) {
if (mRecache) {
mBlockOutStream.cancel();
}
mRecache = false;
}
return ret;
}
private void updateCurrentBuffer() throws IOException {
long length = BUFFER_SIZE;
if (mBufferStartPosition + length > mBlockInfo.length) {
length = mBlockInfo.length - mBufferStartPosition;
}
LOG.info(String.format("Try to find remote worker and read block %d from %d, with len %d",
mBlockInfo.blockId, mBufferStartPosition, length));
mCurrentBuffer = readRemoteByteBuffer(mBlockInfo, mBufferStartPosition, length);
if (mCurrentBuffer == null) {
mBlockInfo = TFS.getClientBlockInfo(FILE.FID, BLOCK_INDEX);
mCurrentBuffer = readRemoteByteBuffer(mBlockInfo, mBufferStartPosition, length);
}
}
}