/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package com.act.utils.rocksdb;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.rocksdb.ColumnFamilyDescriptor;
import org.rocksdb.ColumnFamilyHandle;
import org.rocksdb.CompactionStyle;
import org.rocksdb.CompressionType;
import org.rocksdb.DBOptions;
import org.rocksdb.MergeOperator;
import org.rocksdb.Options;
import org.rocksdb.RocksDB;
import org.rocksdb.RocksDBException;
import org.rocksdb.StringAppendOperator;
import java.io.File;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class DBUtil {
private static final Logger LOGGER = LogManager.getFormatterLogger(DBUtil.class);
private static final Charset UTF8 = StandardCharsets.UTF_8;
private static final String DEFAULT_ROCKSDB_COLUMN_FAMILY = "default";
// Dunno why RocksDB needs two different types for these...
private static final Options ROCKS_DB_CREATE_OPTIONS = new Options()
.setCreateIfMissing(true)
.setDisableDataSync(true)
.setAllowMmapReads(true) // Trying all sorts of performance tweaking knobs, which are not well documented. :(
.setAllowMmapWrites(true)
.setWriteBufferSize(1 << 27) // Warning: setting this higher inflates the index size w/o obvious time benefits.
.setArenaBlockSize(1 << 20)
.setCompressionType(CompressionType.SNAPPY_COMPRESSION) // Will hopefully trade CPU for I/O.
;
public static final DBOptions ROCKS_DB_OPEN_OPTIONS = new DBOptions()
.setCreateIfMissing(false)
.setDisableDataSync(true)
.setAllowMmapReads(true)
.setAllowMmapWrites(true)
;
/**
* Create a new rocks DB at a particular location on disk.
* @param pathToIndex A path to the directory where the index will be created.
* @param columnFamilies Column families to create in the DB.
* @param <T> A type (probably an enum) that represents a set of column families.
* @return A DB and map of column family labels (as T) to enums.
* @throws RocksDBException
*/
public static <T extends ColumnFamilyEnumeration<T>> RocksDBAndHandles<T> createNewRocksDB(
File pathToIndex, T[] columnFamilies) throws RocksDBException {
RocksDB db = null; // Not auto-closable.
Map<T, ColumnFamilyHandle> columnFamilyHandles = new HashMap<>();
db = RocksDB.open(ROCKS_DB_CREATE_OPTIONS, pathToIndex.getAbsolutePath());
for (T cf : columnFamilies) {
LOGGER.info("Creating column family %s", cf.getName());
ColumnFamilyHandle cfh =
db.createColumnFamily(new ColumnFamilyDescriptor(cf.getName().getBytes(UTF8)));
columnFamilyHandles.put(cf, cfh);
}
return new RocksDBAndHandles<T>(db, columnFamilyHandles);
}
/**
* Open an existing RocksDB index.
* @param pathToIndex A path to the RocksDB index directory to use.
* @param columnFamilies A list of column familities to open. Must be exhaustive, non-empty, and non-null.
* @return A DB and map of column family labels (as T) to enums.
* @throws RocksDBException
*/
public static <T extends ColumnFamilyEnumeration<T>> RocksDBAndHandles<T> openExistingRocksDB(
File pathToIndex, T[] columnFamilies) throws RocksDBException {
if (columnFamilies == null || columnFamilies.length == 0) {
throw new RuntimeException("Cannot open a RocksDB with an empty list of column families.");
}
List<ColumnFamilyDescriptor> columnFamilyDescriptors = new ArrayList<>(columnFamilies.length + 1);
// Must also open the "default" family or RocksDB will probably choke.
columnFamilyDescriptors.add(new ColumnFamilyDescriptor(DEFAULT_ROCKSDB_COLUMN_FAMILY.getBytes()));
for (T family : columnFamilies) {
columnFamilyDescriptors.add(new ColumnFamilyDescriptor(family.getName().getBytes()));
}
List<ColumnFamilyHandle> columnFamilyHandles = new ArrayList<>(columnFamilyDescriptors.size());
DBOptions dbOptions = ROCKS_DB_OPEN_OPTIONS;
dbOptions.setCreateIfMissing(false);
RocksDB db = RocksDB.open(dbOptions, pathToIndex.getAbsolutePath(),
columnFamilyDescriptors, columnFamilyHandles);
Map<T, ColumnFamilyHandle> columnFamilyHandleMap = new HashMap<>(columnFamilies.length);
// TODO: can we zip these together more easily w/ Java 8?
for (int i = 0; i < columnFamilyDescriptors.size(); i++) {
ColumnFamilyDescriptor cfd = columnFamilyDescriptors.get(i);
ColumnFamilyHandle cfh = columnFamilyHandles.get(i);
String familyName = new String(cfd.columnFamilyName(), UTF8);
T descriptorFamily = columnFamilies[0].getFamilyByName(familyName); // Use any instance to get the next family.
if (descriptorFamily == null) {
if (!DEFAULT_ROCKSDB_COLUMN_FAMILY.equals(familyName)) {
String msg = String.format("Found unexpected family name '%s' when trying to open RocksDB at %s",
familyName, pathToIndex.getAbsolutePath());
LOGGER.error(msg);
// Crash if we don't recognize the contents of this DB.
throw new RuntimeException(msg);
}
// Just skip this column family if it doesn't map to something we know but is expected.
continue;
}
columnFamilyHandleMap.put(descriptorFamily, cfh);
}
return new RocksDBAndHandles<T>(db, columnFamilyHandleMap);
}
}