/** * Copyright 2011 LiveRamp * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.liveramp.hank.storage.cueball; import java.io.FileInputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.SortedSet; import com.liveramp.commons.util.ByteBufferMemoryUsageEstimator; import com.liveramp.commons.util.BytesUtils; import com.liveramp.hank.compression.cueball.CueballCompressionCodec; import com.liveramp.hank.hasher.Hasher; import com.liveramp.hank.storage.CacheStatistics; import com.liveramp.hank.storage.Reader; import com.liveramp.hank.storage.ReaderResult; import com.liveramp.hank.util.SynchronizedMemoryBoundCache; public class CueballReader implements Reader { private static final KeyHashBufferThreadLocal keyHashBufferThreadLocal = new KeyHashBufferThreadLocal(); private static final ByteBuffer NOT_FOUND_MARKER = ByteBuffer.wrap(new byte[]{}); private final Hasher hasher; private final int valueSize; private final long[] hashIndex; private final FileChannel channel; private final int keyHashSize; private final int fullRecordSize; private final CueballCompressionCodec compressionCodec; private int maxUncompressedBufferSize; private int maxCompressedBufferSize; private final HashPrefixCalculator prefixer; private final int versionNumber; private SynchronizedMemoryBoundCache<ByteBuffer, ByteBuffer> cache; public CueballReader(String partitionRoot, int keyHashSize, Hasher hasher, int valueSize, int hashIndexBits, CueballCompressionCodec compressionCodec, long cacheNumBytesCapacity, int cacheNumItemsCapacity) throws IOException { SortedSet<CueballFilePath> bases = Cueball.getBases(partitionRoot); if (bases == null || bases.size() == 0) { throw new IOException("Could not detect any Cueball base in " + partitionRoot); } CueballFilePath latestBase = bases.last(); this.keyHashSize = keyHashSize; this.hasher = hasher; this.valueSize = valueSize; this.compressionCodec = compressionCodec; this.fullRecordSize = valueSize + keyHashSize; this.prefixer = new HashPrefixCalculator(hashIndexBits); this.versionNumber = latestBase.getVersion(); channel = new FileInputStream(latestBase.getPath()).getChannel(); Footer footer = new Footer(channel, hashIndexBits); hashIndex = footer.getHashIndex(); maxUncompressedBufferSize = footer.getMaxUncompressedBufferSize(); maxCompressedBufferSize = footer.getMaxCompressedBufferSize(); cache = new SynchronizedMemoryBoundCache<ByteBuffer, ByteBuffer>( cacheNumBytesCapacity > 0 || cacheNumItemsCapacity > 0, cacheNumBytesCapacity, cacheNumItemsCapacity, new ByteBufferMemoryUsageEstimator(), new ByteBufferMemoryUsageEstimator()); } @Override public void get(ByteBuffer key, ReaderResult result) throws IOException { // Note: keyHash buffer might be larger than keyHashSize byte[] keyHash = computeKeyHash(key); ByteBuffer keyHashByteBuffer = ByteBuffer.wrap(keyHash); int hashPrefix = prefixer.getHashPrefix(keyHash, 0); long baseOffset = hashIndex[hashPrefix]; // by default, we didn't find what we were looking for result.notFound(); // baseOffset of -1 means that our hashPrefix doesn't map to any blocks if (baseOffset >= 0) { // Attempt to load value from the cache if (loadValueFromCache(keyHashByteBuffer, result)) { return; } // We will read the compressed buffer and decompress it in the same buffer. result.requiresBufferSize(maxCompressedBufferSize + maxUncompressedBufferSize); // set up to read a chunk from the datafile ByteBuffer buffer = result.getBuffer(); buffer.rewind(); buffer.limit(maxCompressedBufferSize); int bytesRead = channel.read(buffer, baseOffset); // decompress from the beginning of the buffer into the unoccupied end of // the buffer final int uncompressedStart = bytesRead; int decompressedLength = compressionCodec.decompress(buffer.array(), 0, bytesRead, buffer.array(), uncompressedStart); // scan the chunk we read to find a matching key, if there is one, // returning the recordfile offset int bufferOffset = getValueOffset(buffer.array(), uncompressedStart, uncompressedStart + decompressedLength, keyHash); // -1 means that we didn't find the key if (bufferOffset > -1) { result.found(); buffer.limit(bufferOffset + valueSize); buffer.position(bufferOffset); addValueToCache(keyHashByteBuffer, buffer); } else { // key not found addNotFoundToCache(keyHashByteBuffer); } } } public Integer getVersionNumber() { return versionNumber; } @Override public CacheStatistics getCacheStatistics() { return new CacheStatistics( cache.size(), cache.getMaxNumItems(), cache.getNumManagedBytes(), cache.getMaxNumManagedBytes()); } @Override public void close() throws IOException { channel.close(); cache = null; } private int getValueOffset(byte[] keyfileBufferChunk, int off, int limit, byte[] key) { for (; off < limit; off += fullRecordSize) { int comparison = BytesUtils.compareBytesUnsigned(keyfileBufferChunk, off, key, 0, keyHashSize); // found match if (comparison == 0) { return off + keyHashSize; } // passed the spot where our key could have been found, so not going to // find it if (comparison == 1) { break; } } // looked everywhere, didn't find it! return -1; } private static class KeyHashBufferThreadLocal extends ThreadLocal<byte[]> { private static int KEY_HASH_BUFFER_INITIAL_SIZE = 8; @Override protected byte[] initialValue() { return new byte[KEY_HASH_BUFFER_INITIAL_SIZE]; } protected byte[] getAndRequireBufferSize(int size) { byte[] buffer = this.get(); if (buffer.length < size) { buffer = new byte[size]; this.set(buffer); } return buffer; } } // Note: result buffer might be larger than keyHashSize private byte[] computeKeyHash(ByteBuffer key) { // Reuse a thread local buffer, but first make sure it is at least of the required size byte[] keyHash = keyHashBufferThreadLocal.getAndRequireBufferSize(keyHashSize); hasher.hash(key, keyHashSize, keyHash); return keyHash; } private void addValueToCache(ByteBuffer keyHash, ByteBuffer value) { cache.put(BytesUtils.byteBufferDeepCopy(keyHash), BytesUtils.byteBufferDeepCopy(value)); } private void addNotFoundToCache(ByteBuffer keyHash) { cache.put(BytesUtils.byteBufferDeepCopy(keyHash), NOT_FOUND_MARKER); } // Return true if managed to read the corresponding value from the cache and into result private boolean loadValueFromCache(ByteBuffer keyHash, ReaderResult result) { ByteBuffer value; value = cache.get(keyHash); if (value != null) { // Compare against the not found marker (note that this is an address equality // and not an object equality on purpose) if (value != NOT_FOUND_MARKER) { // Load cached value into result result.deepCopyIntoResultBuffer(value); result.found(); } else { result.notFound(); } // Was found in cache result.setL1CacheHit(true); return true; } else { // Was not found in cache return false; } } }