/**
* Copyright 2011 LiveRamp
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.liveramp.hank.storage.curly;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.HashMap;
import java.util.Map;
import java.util.SortedSet;
import com.liveramp.commons.util.ByteBufferMemoryUsageEstimator;
import com.liveramp.commons.util.BytesUtils;
import com.liveramp.hank.compression.CompressionCodec;
import com.liveramp.hank.compression.Decompressor;
import com.liveramp.hank.storage.CacheStatistics;
import com.liveramp.hank.storage.Reader;
import com.liveramp.hank.storage.ReaderResult;
import com.liveramp.hank.util.EncodingHelper;
import com.liveramp.hank.util.SynchronizedMemoryBoundCache;
import com.liveramp.hank.util.UnsafeByteArrayOutputStream;
public class CurlyReader implements Reader, ICurlyReader {
private final Reader keyFileReader;
private final int readBufferSize;
private final FileChannel recordFile;
private final int versionNumber;
private final int bufferReuseMaxSize;
private SynchronizedMemoryBoundCache<ByteBuffer, ByteBuffer> cache;
private final CompressionCodec blockCompressionCodec;
private final int offsetNumBytes;
private final int offsetInBlockNumBytes;
// Last decompressed block cache
private final boolean cacheLastDecompressedBlock;
private ByteBuffer lastDecompressedBlock;
private long lastDecompressedBlockOffset = -1;
private static class Local {
private final Map<CompressionCodec, Decompressor> blockDecompressors;
private UnsafeByteArrayOutputStream decompressionOutputStream;
public Local() {
this.blockDecompressors = new HashMap<CompressionCodec, Decompressor>();
this.decompressionOutputStream = new UnsafeByteArrayOutputStream();
}
public Decompressor getBlockDecompressor(CompressionCodec blockDecompressorCodec) {
Decompressor blockDecompressor = blockDecompressors.get(blockDecompressorCodec);
if (blockDecompressor == null) {
blockDecompressor = blockDecompressorCodec.getFactory().getDecompressor();
blockDecompressors.put(blockDecompressorCodec, blockDecompressor);
}
return blockDecompressor;
}
public UnsafeByteArrayOutputStream getDecompressionOutputStream() {
return decompressionOutputStream;
}
public void clear() {
decompressionOutputStream.reset();
}
public void dropDecompressionOutputStream() {
decompressionOutputStream = new UnsafeByteArrayOutputStream();
}
}
private static final ThreadLocal<Local> threadLocal = new ThreadLocal<Local>() {
@Override
public Local initialValue() {
return new Local();
}
};
public static CurlyFilePath getLatestBase(String partitionRoot) throws IOException {
SortedSet<CurlyFilePath> bases = Curly.getBases(partitionRoot);
if (bases == null || bases.size() == 0) {
throw new IOException("Could not detect any Curly base in " + partitionRoot);
}
return bases.last();
}
public CurlyReader(CurlyFilePath curlyFile,
int recordFileReadBufferBytes,
Reader keyFileReader,
long cacheNumBytesCapacity,
int cacheNumItemsCapacity) throws IOException {
this(curlyFile, recordFileReadBufferBytes, keyFileReader, cacheNumBytesCapacity, cacheNumItemsCapacity, null, -1, -1, false, 0);
}
public CurlyReader(CurlyFilePath curlyFile,
int recordFileReadBufferBytes,
Reader keyFileReader,
long cacheNumBytesCapacity,
int cacheNumItemsCapacity,
CompressionCodec blockCompressionCodec,
int offsetNumBytes,
int offsetInBlockNumBytes,
boolean cacheLastDecompressedBlock,
int bufferReuseMaxSize) throws IOException {
this.recordFile = new FileInputStream(curlyFile.getPath()).getChannel();
this.keyFileReader = keyFileReader;
this.readBufferSize = recordFileReadBufferBytes;
this.versionNumber = curlyFile.getVersion();
this.blockCompressionCodec = blockCompressionCodec;
this.offsetNumBytes = offsetNumBytes;
this.offsetInBlockNumBytes = offsetInBlockNumBytes;
this.cacheLastDecompressedBlock = cacheLastDecompressedBlock;
this.cache = new SynchronizedMemoryBoundCache<ByteBuffer, ByteBuffer>(
cacheNumBytesCapacity > 0 || cacheNumItemsCapacity > 0,
cacheNumBytesCapacity,
cacheNumItemsCapacity,
new ByteBufferMemoryUsageEstimator(),
new ByteBufferMemoryUsageEstimator());
// Check that key file is at the same version
if (keyFileReader != null &&
keyFileReader.getVersionNumber() != null &&
!keyFileReader.getVersionNumber().equals(versionNumber)) {
throw new IOException("Curly Reader version (" + versionNumber
+ ") does not match the provided key file Reader version (" + keyFileReader.getVersionNumber() + ")");
}
if (cacheLastDecompressedBlock) {
lastDecompressedBlock = ByteBuffer.allocate(1);
}
this.bufferReuseMaxSize = bufferReuseMaxSize;
}
@Override
// Note: the buffer in result must be at least readBufferSize long
public void readRecord(ByteBuffer location, ReaderResult result) throws IOException {
// Attempt to load value from the cache
if (loadValueFromCache(location, result)) {
return;
}
// Deep copy the location if caching is active, since result might point to location and overwrite it
ByteBuffer locationDeepCopy = cache.isEnabled() ? BytesUtils.byteBufferDeepCopy(location) : null;
if (blockCompressionCodec == null) {
// When not using block compression, location just contains an offset. Decode it.
long recordFileOffset = EncodingHelper.decodeLittleEndianFixedWidthLong(location);
// Directly read record into result
readRecordAtOffset(recordFileOffset, result);
} else {
// When using block compression, location contains the block's offset and an offset in the block. Decode them.
long recordFileBlockOffset = EncodingHelper.decodeLittleEndianFixedWidthLong(location.array(),
location.arrayOffset() + location.position(), offsetNumBytes);
long offsetInBlock = EncodingHelper.decodeLittleEndianFixedWidthLong(location.array(),
location.arrayOffset() + location.position() + offsetNumBytes, offsetInBlockNumBytes);
ByteBuffer decompressedBlockByteBuffer;
if (cacheLastDecompressedBlock && lastDecompressedBlockOffset == recordFileBlockOffset) {
// This block has been decompressed just before, reuse it
decompressedBlockByteBuffer = lastDecompressedBlock;
} else {
// Read in the compressed block into the result
readRecordAtOffset(recordFileBlockOffset, result);
// Decompress block
decompressedBlockByteBuffer = decompressBlock(result.getBuffer());
// Cache the decompressed block if requested
if (cacheLastDecompressedBlock) {
lastDecompressedBlockOffset = recordFileBlockOffset;
lastDecompressedBlock = BytesUtils.byteBufferDeepCopy(decompressedBlockByteBuffer, lastDecompressedBlock);
}
}
// Position ourselves at the beginning of the actual value
decompressedBlockByteBuffer.position((int)offsetInBlock);
// Determine result value size
int valueSize = EncodingHelper.decodeLittleEndianVarInt(decompressedBlockByteBuffer);
// Copy decompressed result into final result buffer
result.requiresBufferSize(valueSize);
result.getBuffer().clear();
// We can exactly wrap our value
result.getBuffer().put(
decompressedBlockByteBuffer.array(),
decompressedBlockByteBuffer.arrayOffset() + decompressedBlockByteBuffer.position(),
valueSize);
result.getBuffer().flip();
// Drop decompression buffer if needed, it's not used anymore
dropDecompressionBuffer();
}
// Store result in cache if needed
addValueToCache(locationDeepCopy, result.getBuffer());
}
private ByteBuffer decompressBlock(ByteBuffer block) throws IOException {
Local local = threadLocal.get();
local.clear();
local.getBlockDecompressor(blockCompressionCodec).decompressBlock(
block.array(),
block.arrayOffset() + block.position(),
block.remaining(),
local.getDecompressionOutputStream());
return local.getDecompressionOutputStream().getByteBuffer();
}
private void dropDecompressionBuffer() {
Local local = threadLocal.get();
if (local.getDecompressionOutputStream().getByteBuffer() != null &&
local.getDecompressionOutputStream().getByteBuffer().capacity() > bufferReuseMaxSize) {
local.dropDecompressionOutputStream();
}
}
// Note: the buffer in result must be at least readBufferSize long
private void readRecordAtOffset(long recordFileOffset, ReaderResult result) throws IOException {
// Let's reset the buffer so we can do our read.
result.getBuffer().rewind();
// the buffer is already at least this big, so we'll extend it back out.
result.getBuffer().limit(readBufferSize);
// TODO: it does seem like there's a chance that this could return too few
// bytes to do the varint decoding.
recordFile.read(result.getBuffer(), recordFileOffset);
result.getBuffer().rewind();
int recordSize = EncodingHelper.decodeLittleEndianVarInt(result.getBuffer());
// now we know how many bytes to read. do the second read to get the data.
int bytesInRecordSize = result.getBuffer().position();
// we may already have read the entire value in during our first read. we
// can tell this if the remainin() is >= the record size.
if (result.getBuffer().remaining() < recordSize) {
// hm, looks like we didn't read the whole value the first time. bummer.
// the good news is that we *do* know how much to read this time. the
// new size we select is big enough to hold this value and its varint
// size. note that we won't actually be reading the varint part again -
// we only do this size adjustment to help prevent the next under-read
// from requiring a buffer resize.
int newSize = recordSize + EncodingHelper.MAX_VARINT_SIZE;
// resize the buffer
result.requiresBufferSize(newSize);
result.getBuffer().position(0);
result.getBuffer().limit(recordSize);
// read until we've either run out of bytes or gotten the entire record
int bytesRead = 0;
while (bytesRead < recordSize) {
// since we're using the stateless version of read(), we have to keep
// moving the offset pointer ourselves
int bytesReadTemp = recordFile.read(result.getBuffer(), recordFileOffset
+ bytesInRecordSize + bytesRead);
if (bytesReadTemp == -1) {
// hm, actually, i think this is an error case!
break;
}
bytesRead += bytesReadTemp;
}
// we're done reading, so position back to beginning of buffer
result.getBuffer().position(0);
}
// the value should start at buffer.position() and go for recordSize
// bytes, so limit it appropriately.
result.getBuffer().limit(recordSize + result.getBuffer().position());
}
@Override
public void get(ByteBuffer key, ReaderResult result) throws IOException {
// we want at least readBufferSize bytes of available space. we might resize
// again later.
result.requiresBufferSize(readBufferSize);
// ask the keyfile for this key
keyFileReader.get(key, result);
// if the key is found, then we are prepared to do the second lookup.
if (result.isFound()) {
// now we know where to look
readRecord(result.getBuffer(), result);
}
}
@Override
public Integer getVersionNumber() {
return versionNumber;
}
@Override
public CacheStatistics getCacheStatistics() {
CacheStatistics keyReaderCacheStatistics = keyFileReader.getCacheStatistics();
return new CacheStatistics(
keyReaderCacheStatistics.getNumItems() + cache.size(),
keyReaderCacheStatistics.getMaxNumItems() + cache.getMaxNumItems(),
keyReaderCacheStatistics.getNumManagedBytes() + cache.getNumManagedBytes(),
keyReaderCacheStatistics.getMaxNumManagedBytes() + cache.getMaxNumManagedBytes());
}
// Note: location should already be a deep copy that won't get modified
private void addValueToCache(ByteBuffer location, ByteBuffer value) {
cache.put(location, BytesUtils.byteBufferDeepCopy(value));
}
// Return true if managed to read the corresponding value from the cache and into result
private boolean loadValueFromCache(ByteBuffer location, ReaderResult result) {
ByteBuffer value = cache.get(location);
if (value != null) {
result.deepCopyIntoResultBuffer(value);
result.found();
result.setL2CacheHit(true);
return true;
} else {
return false;
}
}
@Override
public void close() throws IOException {
if (recordFile != null) {
recordFile.close();
}
if (keyFileReader != null) {
keyFileReader.close();
}
cache = null;
}
}