/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.core.segment.memory;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Preconditions;
import com.linkedin.pinot.common.segment.ReadMode;
import xerial.larray.buffer.LBuffer;
import xerial.larray.buffer.LBufferAPI;
import xerial.larray.buffer.UnsafeUtil;
import static xerial.larray.buffer.UnsafeUtil.unsafe;
import xerial.larray.mmap.MMapBuffer;
import xerial.larray.mmap.MMapMode;
/**
* In-memory byte buffer for pinot data.
*
* The byte buffer may be memory mapped or off-heap (direct allocation).
* The main advantage of this class over ByteBuffer is to support buffers
* larger than 2GB. This also allows memory-mapping files larger than 2GB.
*
* <b>NOTE:</b> All the acesses to this buffer are unchecked. Meaning, accessing
* index beyond the size of the buffer is undefined - it may crash or provide garbage
* value.
*
* The 'Index' part of the name is temporary to limit the usage scope
* in order to bake the class first. Use this as an interface only. It's
* implementation *will* change.
*
*/
public class PinotLByteBuffer extends PinotDataBuffer {
private static Logger LOGGER = LoggerFactory.getLogger(PinotLByteBuffer.class);
private LBufferAPI buffer;
private long startPosition = 0L;
private long size = 0L;
/**
* Fully load the file in to the in-memory buffer
* @param file file containing index data
* @param readMode mmap vs heap mode for the buffer
* @param openMode read or read_write mode for the index
* @param context context for buffer allocation. Use mainly for resource tracking
* @return in-memory buffer containing data
*/
public static PinotLByteBuffer fromFile(File file, ReadMode readMode, FileChannel.MapMode openMode, String context)
throws IOException {
return fromFile(file, 0, file.length(), readMode, openMode, context);
}
/**
* Loads a portion of file in memory. This will load data from [startPosition, startPosition + length).
* @param file file to load
* @param startPosition (inclusive) start startPosition to the load the data from in the file
* @param length size of the data from
* @param readMode mmap vs heap
* @param openMode read vs read/write
* @param context context for buffer allocation. Use mainly for resource tracking
* @return in-memory buffer containing data
* @throws IOException
*/
public static PinotLByteBuffer fromFile(File file, long startPosition, long length,
ReadMode readMode, FileChannel.MapMode openMode, String context)
throws IOException {
Preconditions.checkNotNull(file, "Index file can not be null");
if (readMode == ReadMode.heap) {
return loadFromFile(file, startPosition, length, context);
} else if (readMode == ReadMode.mmap) {
return mapFromFile(file, startPosition, length, openMode, context);
} else {
throw new RuntimeException("Unknown readmode: " + readMode.name());
}
}
static PinotLByteBuffer mapFromFile(File file, long start, long length, FileChannel.MapMode openMode, String context)
throws IOException {
MMapBuffer buffer = mapFromFileInternal(file, start, length, openMode, context);
return new PinotLByteBuffer(buffer, true);
}
private static MMapBuffer mapFromFileInternal(File file, long start, long length,
FileChannel.MapMode openMode, String context)
throws IOException {
MMapMode mmapMode = (openMode == FileChannel.MapMode.READ_ONLY) ?
MMapMode.READ_ONLY : MMapMode.READ_WRITE;
// TODO: add memory tracking MMapUtils
MMapBuffer buf = new MMapBuffer(file, start, length, mmapMode);
return buf;
}
static PinotLByteBuffer loadFromFile(File file, long startPosition, long length, String context)
throws IOException {
// TODO: track memory
LBuffer buf = new LBuffer(length);
PinotLByteBuffer pinotDataBuffer = new PinotLByteBuffer(buf, true);
pinotDataBuffer.readFrom(file, startPosition, length);
return pinotDataBuffer;
}
public static PinotLByteBuffer allocateDirect(long size) {
LBuffer buffer = new LBuffer(size);
PinotLByteBuffer pinotDataBuffer = new PinotLByteBuffer(buffer, true);
return pinotDataBuffer;
}
private PinotLByteBuffer(LBufferAPI buffer, boolean isOwner) {
this(buffer, isOwner, 0, buffer.size());
}
private PinotLByteBuffer(LBufferAPI buffer, boolean isOwner, long startPosition, long size) {
Preconditions.checkNotNull(buffer);
this.buffer = buffer;
this.owner = isOwner;
this.startPosition = startPosition;
this.size = size;
}
/**
* Transfer the ownership of this buffer. Ownership is transferred only if
* this buffer is the owner. Otherwise, this method simply acts like a copy
* @param rhs
*/
public void transferTo(PinotDataBuffer rhs) {
Preconditions.checkNotNull(rhs);
Preconditions.checkArgument(rhs instanceof PinotLByteBuffer);
PinotLByteBuffer rhsBuffer = (PinotLByteBuffer) rhs;
if (rhs != this) {
rhsBuffer.buffer = buffer;
rhsBuffer.owner = owner;
this.owner = false;
}
}
/**
* Duplicate this buffer without transferring ownership.
* The new buffer will share the underlying data buffer (no data copy) and it's bounds.
* @return newly allocated buffer (does not own data)
*/
public PinotLByteBuffer duplicate() {
PinotLByteBuffer duplicate = new PinotLByteBuffer(this.buffer, false, this.startPosition, this.size);
return duplicate;
}
/**
* Releases the data buffer if this is owner.
* Accesses after close() are undefined
* @throws Exception
*/
@Override
public void close() {
if (owner && buffer != null) {
if (buffer instanceof MMapBuffer) {
((MMapBuffer) buffer).flush();
}
buffer.release();
buffer = null;
}
}
/**
* Returns the byte value at given index
*/
public byte apply(int index) {
return buffer.apply(startPosition + index);
}
/**
* Read the byte at index
* @param index position in bytebuffer
* @return
*/
public byte getByte(long index) {
return buffer.getByte(startPosition + index);
}
public byte getByte(int index) {
return buffer.getByte(startPosition + index);
}
public void putByte(long index, byte val) {
buffer.putByte(startPosition + index, val);
}
public void putChar(long index, char c) {
buffer.putChar(startPosition + index, c);
}
public char getChar(long index) {
return buffer.getChar(startPosition + index);
}
public void putFloat(long index, float v) {
buffer.putFloat(startPosition + index, v);
}
public void putFloat(int index, float value) {
buffer.putFloat(startPosition + index, value);
}
public void putLong(long index, long l1) {
buffer.putLong(startPosition + index, l1);
}
public long getLong(long index) {
return buffer.getLong(startPosition + index);
}
public void putLong(int index, long value) {
buffer.putLong(startPosition + index, value);
}
public int getInt(int index) {
return buffer.getInt(startPosition + index);
}
public int getInt(long index) {
return buffer.getInt(startPosition + index);
}
public void putInt(int index, int value) {
buffer.putInt(startPosition + index, value);
}
public double getDouble(long l) {
return buffer.getDouble(startPosition + l);
}
public void putDouble(long index, double value) {
buffer.putDouble(startPosition + index, value);
}
public short getShort(int index) {
return buffer.getShort(startPosition + index);
}
public short getShort(long index) {
return buffer.getShort(startPosition + index);
}
public void putShort(int index, short value) {
buffer.putShort(startPosition + index, value);
}
public void putShort(long index, short value) {
buffer.putShort(startPosition + index, value);
}
public void putInt(long index, int value) {
buffer.putInt(startPosition + index, value);
}
public long getLong(int index) {
return buffer.getLong(startPosition + index);
}
public float getFloat(int index) {
return buffer.getFloat(startPosition + index);
}
public float getFloat(long index) {
return buffer.getFloat(startPosition + index);
}
public void putByte(int index, byte value) {
buffer.putByte(startPosition + index, value);
}
public void putDouble(int index, double value) {
buffer.putDouble(startPosition + index, value);
}
public double getDouble(int index) {
return buffer.getDouble(startPosition + index);
}
public char getChar(int index) {
return buffer.getChar(startPosition + index);
}
public void putChar(int index, char value) {
buffer.putChar(startPosition + index, value);
}
public void fill(long offset, long length, byte value) {
unsafe.setMemory(address() + offset, length, value);
}
/**
* creates a view on a slice of buffer with range [0, (end-start) ) mapped
* to [start, end) of the original buffer. New buffer will share the same
* underlying buffer as the original. Any changes will be visible in the original buffer.
*
* There is no data copy
* @param start start position
* @param end end position
* @return non-owning sliced buffer
*/
public PinotLByteBuffer view(long start, long end) {
PinotLByteBuffer buffer = new PinotLByteBuffer(this.buffer, false, start, (end-start) );
return buffer;
}
/**
* Copy contents of this buffer from srcOffset to destArray
* @param srcOffset startPosition in this buffer to copy from
* @param destArray destination array to copy data to
* @param destOffset position in destArray to copy from
* @param size total size of data to copy
*/
public void copyTo(long srcOffset, byte[] destArray, int destOffset, int size) {
int cursor = destOffset;
for (ByteBuffer bb : toDirectByteBuffers(srcOffset, size)) {
int bbSize = bb.remaining();
if ((cursor + bbSize) > destArray.length)
throw new ArrayIndexOutOfBoundsException(String.format("cursor + bbSize = %,d", cursor + bbSize));
bb.get(destArray, cursor, bbSize);
cursor += bbSize;
}
}
/**
* Read the given source byte array, then overwrite the buffer contents
* @param src
* @param destOffset
* @return
*/
public int readFrom(byte[] src, long destOffset) {
return readFrom(src, 0, destOffset, src.length);
}
/**
* Read the given source byte arrey, then overwrite the buffer contents
* @param src
* @param srcOffset
* @param destOffset
* @param length
* @return
*/
public int readFrom(byte[] src, int srcOffset, long destOffset, int length) {
return readFrom(ByteBuffer.wrap(src), srcOffset, destOffset, length);
}
public int readFrom(ByteBuffer sourceBuffer, int srcOffset, long destOffset, int length) {
ByteBuffer dupBuffer = sourceBuffer.duplicate();
int readLen = (int) Math.min(dupBuffer.limit() - srcOffset, Math.min(size() - destOffset, length));
ByteBuffer b = toDirectByteBuffer(destOffset, readLen);
dupBuffer.position(srcOffset);
// we need to set the limit here (after the Math.min calculation above
// because of how b.put(dupBuffer) works. it will copy limit() - position()
// bytes
dupBuffer.limit(srcOffset + length);
b.put(dupBuffer);
return readLen;
}
public void readFrom(File dataFile)
throws IOException {
readFrom(dataFile, 0, dataFile.length());
}
protected void readFrom(File file, long startPosition, long length)
throws IOException {
long bufPosition = 0;
try (RandomAccessFile raf = new RandomAccessFile(file, "rw")) {
// arbitrary size..somewhat conservative to avoid impacting
// jvm configurations
int readSize = 10 * 1024 * 1024;
// TODO: track memory
ByteBuffer readBuffer = ByteBuffer.allocateDirect(readSize);
long endPosition = startPosition + length;
for (long offset = startPosition; offset < endPosition; ) {
int bytesRead = raf.getChannel().read(readBuffer, offset);
this.readFrom(readBuffer, 0, bufPosition, bytesRead);
readBuffer.clear();
bufPosition += bytesRead;
offset += bytesRead;
}
}
}
public long size() {
return size;
}
public long address() {
return buffer.address() + startPosition;
}
/**
* Convert this buffer to a java array.
* @return
*/
public byte[] toArray() {
if (size() > Integer.MAX_VALUE)
throw new IllegalStateException("Cannot create byte array of more than 2GB");
int len = (int) size();
ByteBuffer bb = toDirectByteBuffer(0L, len);
byte[] b = new byte[len];
// Copy data to the array
bb.get(b, 0, len);
return b;
}
/**
* Gives an sequence of ByteBuffers. Writing to these ByteBuffers modifies the contents of this LBuffer.
* @return
*/
public ByteBuffer[] toDirectByteBuffers() {
return toDirectByteBuffers(startPosition, size());
}
public ByteBuffer[] toDirectByteBuffers(long startOffset, long size) {
long pos = startPosition + startOffset;
long blockSize = Integer.MAX_VALUE;
long limit = pos + size;
int numBuffers = (int) ((size + (blockSize - 1)) / blockSize);
ByteBuffer[] result = new ByteBuffer[numBuffers];
int index = 0;
while (pos < limit) {
long blockLength = Math.min(limit - pos, blockSize);
result[index++] = UnsafeUtil.newDirectByteBuffer(address() + pos, (int) blockLength).order(ByteOrder.nativeOrder());
pos += blockLength;
}
return result;
}
/**
* Gives a ByteBuffer view of the specified range. Writing to the returned ByteBuffer modifies the contenets of this LByteBuffer
* @param bufferOffset
* @param size
* @return
*/
public ByteBuffer toDirectByteBuffer(long bufferOffset, int size) {
return UnsafeUtil.newDirectByteBuffer(address() + bufferOffset + startPosition, size);
}
@Override
protected long start() {
return startPosition;
}
@Override
public void order(ByteOrder byteOrder) {
throw new UnsupportedOperationException();
}
}