/**
* Copyright 2013-2014 Recruit Technologies Co., Ltd. and contributors
* (see CONTRIBUTORS.md)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. A copy of the
* License is distributed with this work in the LICENSE.md file. You may
* also obtain a copy of the License from
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gennai.gungnir.tuple.store;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import org.gennai.gungnir.utils.GungnirUtils;
import org.gennai.gungnir.utils.IntArrayUtils;
import org.gennai.gungnir.utils.KryoSerializer;
import org.rocksdb.ColumnFamilyDescriptor;
import org.rocksdb.ColumnFamilyHandle;
import org.rocksdb.ColumnFamilyOptions;
import org.rocksdb.DBOptions;
import org.rocksdb.RocksDB;
import org.rocksdb.RocksDBException;
import org.rocksdb.RocksIterator;
import org.rocksdb.WriteBatch;
import org.rocksdb.WriteOptions;
import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.Serializer;
import com.esotericsoftware.kryo.io.Input;
import com.esotericsoftware.kryo.io.Output;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.primitives.UnsignedBytes;
public final class RocksDBTable {
public static class Entry {
private Object hashKey;
private int timeKey;
private int seqNo;
private byte[] value;
public Entry(Object hashKey, int timeKey, int seqNo, byte[] value) {
this.hashKey = hashKey;
this.timeKey = timeKey;
this.seqNo = seqNo;
this.value = value;
}
public void setHashKey(Object hashKey) {
this.hashKey = hashKey;
}
public Object getHashKey() {
return hashKey;
}
public void setTimeKey(int timeKey) {
this.timeKey = timeKey;
}
public int getTimeKey() {
return timeKey;
}
public void setSeqNo(int seqNo) {
this.seqNo = seqNo;
}
public int getSeqNo() {
return seqNo;
}
public byte[] getValue() {
return value;
}
}
private int expireSecs;
private int seekSize;
private ColumnFamilyDescriptor storeDescriptor;
private ColumnFamilyDescriptor hashKeyDescriptor;
private ColumnFamilyDescriptor hashIndexDescriptor;
private ColumnFamilyDescriptor hashMetaDescriptor;
private DBOptions dbOptions;
private RocksDB db;
private ColumnFamilyHandle storeHandle;
private ColumnFamilyHandle hashKeyHandle;
private ColumnFamilyHandle hashIndexHandle;
private ColumnFamilyHandle hashMetaHandle;
private int lastHashIndex = 0;
private int size;
private Entry seekEntry;
private IntArrayUtils i3 = new IntArrayUtils(3);
private IntArrayUtils i1 = new IntArrayUtils(1);
private KryoSerializer serializer = new KryoSerializer();
private Cache<Object, byte[]> keyBytesCache;
private Cache<byte[], Object> keyCache;
private void loadMetaData() {
RocksIterator it = db.newIterator(hashMetaHandle);
try {
for (it.seekToFirst(); it.isValid(); it.next()) {
int hashIndex = i1.get(it.key(), 0);
if (hashIndex >= lastHashIndex) {
lastHashIndex = hashIndex + 1;
}
size += i1.get(it.value(), 0);
}
} finally {
it.dispose();
}
}
private RocksDBTable(String dbPath, int expireSecs, int seekSize) throws RocksDBException {
this.expireSecs = expireSecs;
this.seekSize = seekSize;
storeDescriptor = new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY,
new ColumnFamilyOptions());
hashKeyDescriptor = new ColumnFamilyDescriptor("hash_key", new ColumnFamilyOptions());
hashIndexDescriptor = new ColumnFamilyDescriptor("hash_index", new ColumnFamilyOptions());
hashMetaDescriptor = new ColumnFamilyDescriptor("hash_meta", new ColumnFamilyOptions());
List<ColumnFamilyDescriptor> columnFamilyDescriptors = Lists.newArrayListWithCapacity(4);
columnFamilyDescriptors.add(storeDescriptor);
columnFamilyDescriptors.add(hashKeyDescriptor);
columnFamilyDescriptors.add(hashIndexDescriptor);
columnFamilyDescriptors.add(hashMetaDescriptor);
dbOptions = new DBOptions().setCreateIfMissing(true).setCreateMissingColumnFamilies(true);
List<ColumnFamilyHandle> columnFamilyHandles = Lists.newArrayListWithCapacity(4);
db = RocksDB.open(dbOptions, dbPath, columnFamilyDescriptors, columnFamilyHandles);
storeHandle = columnFamilyHandles.get(0);
hashKeyHandle = columnFamilyHandles.get(1);
hashIndexHandle = columnFamilyHandles.get(2);
hashMetaHandle = columnFamilyHandles.get(3);
loadMetaData();
serializer = new KryoSerializer();
serializer.register(HashKey.class, new Serializer<HashKey>() {
@Override
public void write(Kryo kryo, Output output, HashKey hashKey) {
if (hashKey.key instanceof List) {
kryo.writeObject(output, HashKey.Type.COMPOSITE);
kryo.writeObject(output, hashKey.key);
} else {
kryo.writeObject(output, HashKey.Type.SINGLE);
kryo.writeClassAndObject(output, hashKey.key);
}
}
@Override
public HashKey read(Kryo kryo, Input input, Class<HashKey> type) {
HashKey.Type keyType = kryo.readObject(input, HashKey.Type.class);
Object key = null;
if (keyType == HashKey.Type.COMPOSITE) {
key = kryo.readObject(input, ArrayList.class);
} else {
key = kryo.readClassAndObject(input);
}
return new HashKey(key);
}
});
}
public static RocksDBTable open(String dbPath) throws RocksDBException {
return new RocksDBTable(dbPath, 0, 0);
}
public static RocksDBTable open(String dbPath, int expireSecs, int seekSize)
throws RocksDBException {
return new RocksDBTable(dbPath, expireSecs, seekSize);
}
private void updateMetaData(int hashIndex, int sz, WriteBatch writeBatch)
throws RocksDBException {
if (hashIndex >= 0) {
int hashSize;
byte[] indexBytes = i1.create(hashIndex);
byte[] sizeBytes = db.get(hashMetaHandle, indexBytes);
if (sizeBytes != null) {
hashSize = i1.get(sizeBytes, 0);
} else {
hashSize = 0;
}
hashSize += sz;
if (hashSize > 0) {
writeBatch.put(hashMetaHandle, indexBytes, i1.create(hashSize));
} else {
writeBatch.remove(hashMetaHandle, indexBytes);
byte[] keyBytes = db.get(hashIndexHandle, indexBytes);
writeBatch.remove(hashIndexHandle, indexBytes);
writeBatch.remove(hashKeyHandle, keyBytes);
}
}
size += sz;
}
private int expire(int hashIndex, WriteBatch writeBatch) {
int sz = 0;
if (expireSecs > 0) {
RocksIterator it = db.newIterator(storeHandle);
if (hashIndex >= 0) {
it.seek(i3.create(hashIndex, 0, 0));
} else {
it.seekToFirst();
}
int now = GungnirUtils.currentTimeSecs();
for (; it.isValid(); it.next()) {
int[] ikey = i3.get(it.key());
if (ikey[0] == hashIndex && ikey[1] < now) {
if (ikey[2] >= 0) {
sz++;
} else {
sz += i1.get(it.value(), 0);
}
writeBatch.remove(storeHandle, it.key());
} else {
break;
}
}
it.dispose();
}
return sz;
}
static final class HashKey {
enum Type {
SINGLE, COMPOSITE
}
private Object key;
private HashKey(Object key) {
this.key = key;
}
}
public void setKeyCache(int cacheMaxSize, int cacheExpireSec) {
keyBytesCache = CacheBuilder.newBuilder().maximumSize(cacheMaxSize)
.expireAfterAccess(cacheExpireSec, TimeUnit.SECONDS).build();
keyCache = CacheBuilder.newBuilder().maximumSize(cacheMaxSize)
.expireAfterAccess(cacheExpireSec, TimeUnit.SECONDS).build();
}
private byte[] serializeHashKey(final Object hashKey) {
if (keyBytesCache == null) {
return serializer.serialize(new HashKey(hashKey));
} else {
try {
return keyBytesCache.get(hashKey, new Callable<byte[]>() {
@Override
public byte[] call() throws Exception {
return serializer.serialize(new HashKey(hashKey));
}
});
} catch (ExecutionException e) {
return serializer.serialize(new HashKey(hashKey));
}
}
}
private Object deserHashKey(final byte[] keyBytes) {
if (keyCache == null) {
return serializer.deserialize(keyBytes, HashKey.class).key;
} else {
try {
return keyCache.get(keyBytes, new Callable<Object>() {
@Override
public Object call() throws Exception {
return serializer.deserialize(keyBytes, HashKey.class).key;
}
});
} catch (ExecutionException e) {
return serializer.deserialize(keyBytes, HashKey.class).key;
}
}
}
private byte[] floorKey(byte[] key) {
byte[] lastKey = null;
RocksIterator it = db.newIterator(storeHandle);
it.seek(key);
if (it.isValid()) {
byte[] currentKey = it.key();
if (UnsignedBytes.lexicographicalComparator().compare(currentKey, key) <= 0) {
lastKey = currentKey;
} else {
it.prev();
if (it.isValid()) {
lastKey = it.key();
}
}
} else {
it.seekToLast();
if (it.isValid()) {
lastKey = it.key();
}
}
it.dispose();
return lastKey;
}
public void put(Object hashKey, int timeKey, byte[] value) throws RocksDBException {
WriteBatch writeBatch = new WriteBatch();
Integer hashIndex = null;
if (hashKey != null) {
byte[] keyBytes = serializeHashKey(hashKey);
byte[] indexBytes = db.get(hashKeyHandle, keyBytes);
if (indexBytes != null) {
hashIndex = i1.get(indexBytes, 0);
} else {
hashIndex = lastHashIndex;
indexBytes = i1.create(hashIndex);
writeBatch.put(hashKeyHandle, keyBytes, indexBytes);
writeBatch.put(hashIndexHandle, indexBytes, keyBytes);
lastHashIndex++;
}
} else {
hashIndex = -1;
}
timeKey += expireSecs;
byte[] key = i3.create(hashIndex, timeKey, Integer.MAX_VALUE);
byte[] lastKey = floorKey(key);
if (lastKey == null) {
i3.set(key, 2, 0);
} else {
int[] ikey = i3.get(lastKey);
if (ikey[0] == hashIndex && ikey[1] == timeKey) {
i3.set(key, 2, ikey[2] + 1);
} else {
i3.set(key, 2, 0);
}
}
writeBatch.put(key, value);
int sz = expire(hashIndex, writeBatch);
sz = 1 - sz;
updateMetaData(hashIndex, sz, writeBatch);
WriteOptions writeOptions = new WriteOptions();
try {
db.write(writeOptions, writeBatch);
} finally {
writeBatch.dispose();
writeOptions.dispose();
}
}
public void put(int timeKey, byte[] value) throws RocksDBException {
put(null, timeKey, value);
}
public int incr(Object hashKey, int timeKey) throws RocksDBException {
WriteBatch writeBatch = new WriteBatch();
Integer hashIndex = null;
if (hashKey != null) {
byte[] keyBytes = serializeHashKey(hashKey);
byte[] indexBytes = db.get(hashKeyHandle, keyBytes);
if (indexBytes != null) {
hashIndex = i1.get(indexBytes, 0);
} else {
hashIndex = lastHashIndex;
indexBytes = i1.create(hashIndex);
writeBatch.put(hashKeyHandle, keyBytes, indexBytes);
writeBatch.put(hashIndexHandle, indexBytes, keyBytes);
lastHashIndex++;
}
} else {
hashIndex = -1;
}
timeKey += expireSecs;
int v = 0;
byte[] key = i3.create(hashIndex, timeKey, -1);
byte[] value = db.get(storeHandle, key);
if (value == null) {
v = 1;
} else {
v = i1.incr(value, 0);
}
writeBatch.put(key, i1.create(v));
int sz = expire(hashIndex, writeBatch);
sz = 1 - sz;
updateMetaData(hashIndex, sz, writeBatch);
expire(hashIndex, writeBatch);
WriteOptions writeOptions = new WriteOptions();
try {
db.write(writeOptions, writeBatch);
} finally {
writeBatch.dispose();
writeOptions.dispose();
}
return v;
}
public void incr(int timeKey) throws RocksDBException {
incr(null, timeKey);
}
public int size(Object hashKey) throws RocksDBException {
Integer hashIndex = null;
byte[] indexBytes = null;
if (hashKey != null) {
byte[] keyBytes = serializeHashKey(hashKey);
indexBytes = db.get(hashKeyHandle, keyBytes);
if (indexBytes != null) {
hashIndex = i1.get(indexBytes, 0);
} else {
return 0;
}
} else {
hashIndex = -1;
}
int sz;
if (hashIndex >= 0) {
sz = i1.get(db.get(hashMetaHandle, indexBytes), 0);
} else {
sz = size;
}
if (expireSecs > 0) {
RocksIterator it = db.newIterator(storeHandle);
it.seek(i3.create(hashIndex, 0, 0));
int now = GungnirUtils.currentTimeSecs();
for (; it.isValid(); it.next()) {
int[] ikey = i3.get(it.key());
if (ikey[0] == hashIndex && ikey[1] < now) {
sz--;
} else {
break;
}
}
it.dispose();
}
return sz;
}
public int size() throws RocksDBException {
return size(null);
}
public boolean isEmpty(Object hashKey) throws RocksDBException {
return size(hashKey) == 0;
}
public boolean isEmpty() throws RocksDBException {
return size(null) == 0;
}
private static class HashMetaData {
private int size;
}
public final class EntryIterator {
private Object hashKey;
private Integer hashIndex;
private RocksIterator it = db.newIterator(storeHandle);
private byte[] currentKey;
private Entry current;
private WriteBatch writeBatch;
private Map<Integer, HashMetaData> hashMetaMap;
private EntryIterator() {
hashIndex = -1;
if (expireSecs > 0) {
it.seek(i3.create(-1, GungnirUtils.currentTimeSecs(), 0));
} else {
it.seekToFirst();
}
}
private EntryIterator(Object hashKey) throws RocksDBException {
this.hashKey = hashKey;
byte[] indexBytes = db.get(hashKeyHandle, serializeHashKey(hashKey));
if (indexBytes != null) {
hashIndex = i1.get(indexBytes, 0);
if (expireSecs > 0) {
it.seek(i3.create(hashIndex, GungnirUtils.currentTimeSecs(), 0));
} else {
it.seek(i3.create(hashIndex, 0, 0));
}
}
}
public boolean hasNext() {
if (hashIndex == null) {
return false;
}
if (!it.isValid()) {
return false;
}
if (hashKey != null) {
return hashIndex == i3.get(it.key(), 0);
}
return true;
}
public Entry next() throws RocksDBException {
if (hashIndex == null) {
return null;
}
currentKey = it.key();
int[] ikey = i3.get(currentKey);
if (hashIndex >= 0) {
byte[] keyBytes = db.get(hashIndexHandle, i1.create(ikey[0]));
current = new Entry(deserHashKey(keyBytes), ikey[1], ikey[2], it.value());
} else {
current = new Entry(null, ikey[1], ikey[2], it.value());
}
hashIndex = ikey[0];
it.next();
return current;
}
public void remove() throws RocksDBException {
if (current != null) {
if (writeBatch == null) {
writeBatch = new WriteBatch();
}
if (hashMetaMap == null) {
hashMetaMap = Maps.newHashMap();
}
HashMetaData hashMetaData = hashMetaMap.get(hashIndex);
if (hashMetaData == null) {
hashMetaData = new HashMetaData();
hashMetaMap.put(hashIndex, hashMetaData);
}
if (current.getSeqNo() >= 0) {
hashMetaData.size++;
} else {
hashMetaData.size += i1.get(current.getValue(), 0);
}
writeBatch.remove(storeHandle, currentKey);
}
}
private void commit() throws RocksDBException {
if (writeBatch != null) {
for (Map.Entry<Integer, HashMetaData> entry : hashMetaMap.entrySet()) {
updateMetaData(entry.getKey(), -entry.getValue().size, writeBatch);
}
WriteOptions writeOptions = new WriteOptions();
try {
db.write(writeOptions, writeBatch);
} finally {
writeBatch.dispose();
writeOptions.dispose();
writeBatch = null;
hashMetaMap = null;
}
}
}
public void close() throws RocksDBException {
commit();
it.dispose();
}
}
public EntryIterator iterator(Object hashKey) throws RocksDBException {
return new EntryIterator(hashKey);
}
public EntryIterator iterator() {
return new EntryIterator();
}
public final class SeekIterator {
private RocksIterator it = db.newIterator(storeHandle);
private Entry current;
private Integer hashIndex;
private byte[] currentKey;
private WriteBatch writeBatch;
private Map<Integer, HashMetaData> hashMetaMap;
private SeekIterator() {
}
public void seekToFirst() throws RocksDBException {
it.seekToFirst();
}
public void seek(Object hashKey, int timeKey, int seqNo, boolean inclusive)
throws RocksDBException {
if (hashKey != null) {
byte[] indexBytes = db.get(hashKeyHandle, serializeHashKey(hashKey));
if (indexBytes != null) {
byte[] key = i3.create(i1.get(indexBytes, 0), timeKey, seqNo);
it.seek(key);
if (!inclusive && it.isValid()
&& UnsignedBytes.lexicographicalComparator().compare(it.key(), key) == 0) {
it.next();
}
} else {
it.seek(i3.create(-1, -1, -1));
}
} else {
byte[] key = i3.create(-1, timeKey, seqNo);
it.seek(key);
if (!inclusive && it.isValid()
&& UnsignedBytes.lexicographicalComparator().compare(it.key(), key) == 0) {
it.next();
}
}
}
public boolean isValid() {
return it.isValid();
}
public void next() throws RocksDBException {
it.next();
}
public Entry entry() throws RocksDBException {
currentKey = it.key();
int[] ikey = i3.get(currentKey);
if (ikey[0] >= 0) {
byte[] keyBytes = db.get(hashIndexHandle, i1.create(ikey[0]));
current = new Entry(deserHashKey(keyBytes), ikey[1], ikey[2], it.value());
} else {
current = new Entry(null, ikey[1], ikey[2], it.value());
}
hashIndex = ikey[0];
return current;
}
public void remove() throws RocksDBException {
if (current != null) {
if (writeBatch == null) {
writeBatch = new WriteBatch();
}
if (hashMetaMap == null) {
hashMetaMap = Maps.newHashMap();
}
HashMetaData hashMetaData = hashMetaMap.get(hashIndex);
if (hashMetaData == null) {
hashMetaData = new HashMetaData();
hashMetaMap.put(hashIndex, hashMetaData);
}
if (current.getSeqNo() >= 0) {
hashMetaData.size++;
} else {
hashMetaData.size += i1.get(current.getValue(), 0);
}
writeBatch.remove(storeHandle, currentKey);
}
}
private void commit() throws RocksDBException {
if (writeBatch != null) {
for (Map.Entry<Integer, HashMetaData> entry : hashMetaMap.entrySet()) {
updateMetaData(entry.getKey(), -entry.getValue().size, writeBatch);
}
WriteOptions writeOptions = new WriteOptions();
try {
db.write(writeOptions, writeBatch);
} finally {
writeBatch.dispose();
writeOptions.dispose();
writeBatch = null;
hashMetaMap = null;
}
}
}
public void close() throws RocksDBException {
commit();
it.dispose();
}
}
public SeekIterator seekIterator() {
return new SeekIterator();
}
public Entry removeFirst(Object hashKey) throws RocksDBException {
Integer hashIndex = null;
if (hashKey != null) {
byte[] keyBytes = serializeHashKey(hashKey);
byte[] indexBytes = db.get(hashKeyHandle, keyBytes);
if (indexBytes != null) {
hashIndex = i1.get(indexBytes, 0);
} else {
return null;
}
} else {
hashIndex = -1;
}
Entry removedEntry = null;
RocksIterator it = db.newIterator(storeHandle);
if (hashIndex >= 0) {
it.seek(i3.create(hashIndex, 0, 0));
} else {
it.seekToFirst();
}
WriteBatch writeBatch = null;
int now = GungnirUtils.currentTimeSecs();
int sz = 0;
for (; it.isValid(); it.next()) {
int[] ikey = i3.get(it.key());
if (ikey[0] != hashIndex) {
break;
}
if (writeBatch == null) {
writeBatch = new WriteBatch();
}
if (ikey[2] >= 0) {
sz++;
} else {
sz += i1.get(it.value(), 0);
}
if (expireSecs > 0 && ikey[1] < now) {
writeBatch.remove(storeHandle, it.key());
} else {
removedEntry = new Entry(hashKey, ikey[1], ikey[2], it.value());
writeBatch.remove(storeHandle, it.key());
break;
}
}
try {
if (writeBatch != null) {
updateMetaData(hashIndex, -sz, writeBatch);
WriteOptions writeOptions = new WriteOptions();
try {
db.write(writeOptions, writeBatch);
} finally {
writeBatch.dispose();
writeOptions.dispose();
}
}
} finally {
it.dispose();
}
return removedEntry;
}
public Entry removeFirst() throws RocksDBException {
return removeFirst(null);
}
public List<Entry> compactRange() throws RocksDBException {
SeekIterator it = seekIterator();
if (seekEntry == null) {
it.seekToFirst();
} else {
it.seek(seekEntry.getHashKey(), seekEntry.getTimeKey(), seekEntry.getSeqNo(),
false);
if (!it.isValid()) {
it.seekToFirst();
}
}
int now = GungnirUtils.currentTimeSecs();
List<Entry> expiredEntries = Lists.newArrayList();
for (int i = 0; it.isValid() && i < seekSize; i++) {
seekEntry = it.entry();
if (seekEntry.getTimeKey() < now) {
expiredEntries.add(seekEntry);
it.remove();
it.next();
} else {
seekEntry.setTimeKey(Integer.MAX_VALUE);
seekEntry.setSeqNo(Integer.MAX_VALUE);
it.seek(seekEntry.getHashKey(), seekEntry.getTimeKey(), seekEntry.getSeqNo(),
false);
}
}
it.close();
return expiredEntries;
}
public void close() throws RocksDBException {
if (storeHandle != null) {
storeHandle.dispose();
}
if (hashKeyHandle != null) {
hashKeyHandle.dispose();
}
if (hashIndexHandle != null) {
hashIndexHandle.dispose();
}
if (hashMetaHandle != null) {
hashMetaHandle.dispose();
}
if (db != null) {
db.close();
}
if (dbOptions != null) {
dbOptions.dispose();
}
}
}