/* * Galaxy * Copyright (c) 2012-2014, Parallel Universe Software Co. All rights reserved. * * This program and the accompanying materials are dual-licensed under * either the terms of the Eclipse Public License v1.0 as published by * the Eclipse Foundation * * or (per the licensee's choosing) * * under the terms of the GNU Lesser General Public License version 3.0 * as published by the Free Software Foundation. */ package co.paralleluniverse.galaxy.berkeleydb; import static co.paralleluniverse.common.logging.LoggingUtils.hex; import co.paralleluniverse.common.spring.Component; import co.paralleluniverse.galaxy.server.MainMemoryDB; import co.paralleluniverse.galaxy.server.MainMemoryEntry; import com.google.common.base.Throwables; import com.google.common.primitives.Longs; import com.google.common.primitives.Shorts; import com.sleepycat.bind.tuple.TupleBinding; import com.sleepycat.bind.tuple.TupleInput; import com.sleepycat.bind.tuple.TupleOutput; import com.sleepycat.je.Cursor; import com.sleepycat.je.CursorConfig; import com.sleepycat.je.Database; import com.sleepycat.je.DatabaseConfig; import com.sleepycat.je.DatabaseEntry; import com.sleepycat.je.DatabaseException; import com.sleepycat.je.DatabaseExistsException; import com.sleepycat.je.DatabaseNotFoundException; import com.sleepycat.je.DiskOrderedCursor; import com.sleepycat.je.DiskOrderedCursorConfig; import com.sleepycat.je.Durability; import com.sleepycat.je.Environment; import com.sleepycat.je.EnvironmentConfig; import com.sleepycat.je.LockMode; import com.sleepycat.je.OperationStatus; import com.sleepycat.je.PreloadConfig; import com.sleepycat.je.SecondaryConfig; import com.sleepycat.je.SecondaryCursor; import com.sleepycat.je.SecondaryDatabase; import com.sleepycat.je.SecondaryKeyCreator; import com.sleepycat.je.Transaction; import com.sleepycat.je.TransactionConfig; import it.unimi.dsi.fastutil.longs.LongArrayList; import it.unimi.dsi.fastutil.longs.LongIterator; import java.beans.ConstructorProperties; import java.io.File; import java.io.IOException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.jmx.export.annotation.ManagedAttribute; /** * * @author pron */ public class BerkeleyDB extends Component implements MainMemoryDB { // Note: class must be public for Spring's auto generated javax.management.modelmbean.RequiredModelMBean to expose @ManagedAttribute private static final Logger LOG = LoggerFactory.getLogger(BerkeleyDB.class); private final Environment env; private Database ownerDirectory; private SecondaryDatabase ownerIndex; private Database mainStore; private Database allocationDirectory; private final TupleBinding<MainMemoryEntry> entryBinding; private static final DatabaseEntry SERVER = new DatabaseEntry(Shorts.toByteArray((short) 0)); private final String envHome; private boolean truncate = false; private Durability.SyncPolicy durability = Durability.SyncPolicy.WRITE_NO_SYNC; @ConstructorProperties({"name", "envHome"}) public BerkeleyDB(String name, String envHome) { super(name); this.envHome = envHome; final EnvironmentConfig envConfig = new EnvironmentConfig().setAllowCreate(true).setTransactional(true); envConfig.setDurability(new Durability(durability, Durability.SyncPolicy.SYNC, Durability.ReplicaAckPolicy.SIMPLE_MAJORITY)); final File dir = new File(this.envHome); try { if (!dir.exists()) dir.mkdirs(); } catch (Exception ex) { throw new RuntimeException("cannot mkdir " + envHome, ex); } this.env = new Environment(dir, envConfig); this.entryBinding = new MainMemoryTupleBinding(); } public void setTruncate(boolean truncate) { assertDuringInitialization(); this.truncate = truncate; } @ManagedAttribute public boolean isTruncate() { return truncate; } @ManagedAttribute(currencyTimeLimit = -1, description = "The BDB environment directory") public String getEnvHome() { return envHome; } @Override public void init() throws Exception { super.init(); LOG.info("Opening database, home: {}", env.getHome()); if (truncate) { // make sure the db are exist before truncate is called openOrCreate(); ownerIndex.close(); ownerDirectory.close(); mainStore.close(); allocationDirectory.close(); truncate(); } openOrCreate(); PreloadConfig ownerDirectoryPreloadConfig = new PreloadConfig(); this.ownerDirectory.preload(ownerDirectoryPreloadConfig); if (!truncate) resetOwners(); } private void openOrCreate() throws DatabaseException, IllegalStateException, DatabaseExistsException, DatabaseNotFoundException, IllegalArgumentException { // Open the database. Create it if it does not already exist. this.ownerDirectory = env.openDatabase(null, "ownerDirecotry", new DatabaseConfig().setAllowCreate(true).setTransactional(true)); this.ownerIndex = env.openSecondaryDatabase(null, "ownerIndex", ownerDirectory, ((SecondaryConfig) (new SecondaryConfig().setAllowCreate(true).setSortedDuplicates(true).setTransactional(true))).setAllowPopulate(true).setKeyCreator(new OwnerKeyCreator())); this.mainStore = env.openDatabase(null, "mainStore", new DatabaseConfig().setAllowCreate(true).setTransactional(true)); this.allocationDirectory = env.openDatabase(null, "allocationDirectory", new DatabaseConfig().setAllowCreate(true).setTransactional(true)); } public void truncate() { LOG.info("Truncating database, home: {}", env.getHome()); Transaction txn = env.beginTransaction(null, TransactionConfig.DEFAULT); try { env.truncateDatabase(txn, "ownerDirecotry", false); env.truncateDatabase(txn, "ownerIndex", false); txn.commit(); env.truncateDatabase(null, "mainStore", false); env.truncateDatabase(null, "allocationDirectory", false); } catch (Exception e) { LOG.error("Exception while truncating database. Aborting.", e); txn.abort(); throw Throwables.propagate(e); } } @Override public short casOwner(long id, short oldNode, short newNode) { final DatabaseEntry key = new DatabaseEntry(Longs.toByteArray(id)); final DatabaseEntry value = new DatabaseEntry(); final Transaction txn = env.beginTransaction(null, null); try { OperationStatus status; value.setData(Shorts.toByteArray(newNode)); if (oldNode < 0) { status = ownerDirectory.putNoOverwrite(txn, key, value); if (status == OperationStatus.SUCCESS) { LOG.debug("CAS owner succeeded."); txn.commit(); return newNode; } } status = ownerDirectory.get(txn, key, value, LockMode.RMW); if (status == OperationStatus.SUCCESS) { final short curOldNode = Shorts.fromByteArray(value.getData()); if (LOG.isDebugEnabled()) LOG.debug("CAS owner of {}: current old node: {} wanted old node: {}", new Object[]{hex(id), curOldNode, oldNode}); if (oldNode != curOldNode) { assert curOldNode >= 0; LOG.debug("CAS owner failed."); txn.commit(); return curOldNode; } LOG.debug("CAS owner succeeded."); value.setData(Shorts.toByteArray(newNode)); ownerDirectory.put(txn, key, value); txn.commit(); return newNode; } else if (status == OperationStatus.NOTFOUND) { LOG.debug("CAS owner failed."); txn.commit(); return (short) -1; } LOG.debug("Bad status: {}", status); throw new AssertionError(); } catch (Exception e) { LOG.error("Exception during DB operation. Aborting transaction.", e); txn.abort(); throw Throwables.propagate(e); } } @Override public void removeOwner(short node) { final boolean trace = LOG.isTraceEnabled(); final Transaction txn = null; // final Transaction txn = env.beginTransaction(null, TransactionConfig.DEFAULT); // try { final LongArrayList lines = new LongArrayList(); final DatabaseEntry sKey = new DatabaseEntry(Shorts.toByteArray(node)); final DatabaseEntry pKey = new DatabaseEntry(); final DatabaseEntry data = new DatabaseEntry(); try (SecondaryCursor cursor = ownerIndex.openCursor(txn, null)) { OperationStatus retVal = cursor.getSearchKey(sKey, pKey, data, LockMode.DEFAULT); while (retVal == OperationStatus.SUCCESS) { final long id = Longs.fromByteArray(pKey.getData()); if (trace) LOG.trace("Owner of {}: {} -> 0", id, node); lines.add(id); // cursor.getPrimaryDatabase().put(null, pKey, SERVER); - causes deadlock retVal = cursor.getNextDup(sKey, pKey, data, LockMode.DEFAULT); } } byte[] longArray = new byte[8]; for (LongIterator it = lines.iterator(); it.hasNext();) { toByteArray(it.next(), longArray); pKey.setData(longArray); ownerDirectory.put(null, pKey, SERVER); } // txn.commit(); // } catch (Exception e) { // LOG.error("Exception while removing. Aborting.", e); // txn.abort(); // throw Throwables.propagate(e); // } } public void resetOwners() { final boolean trace = LOG.isTraceEnabled(); final DatabaseEntry key = new DatabaseEntry(); final DatabaseEntry data = new DatabaseEntry(); try (DiskOrderedCursor cursor = ownerDirectory.openCursor(new DiskOrderedCursorConfig().setKeysOnly(true))) { OperationStatus retVal = cursor.getNext(key, data, null); while (retVal == OperationStatus.SUCCESS) { if (trace) LOG.trace("Owner of {} -> 0", Longs.fromByteArray(key.getData())); ownerDirectory.put(null, key, SERVER); retVal = cursor.getNext(key, data, null); } } } @Override public void allocate(short owner, long start, int num) { final DatabaseEntry key = new DatabaseEntry(Longs.toByteArray(start + num - 1)); final DatabaseEntry value = new DatabaseEntry(Shorts.toByteArray(owner)); final Transaction txn = null; // env.beginTransaction(null, null); try { OperationStatus status = allocationDirectory.putNoOverwrite(txn, key, value); if (status != OperationStatus.SUCCESS) { LOG.debug("Bad status: {}", status); throw new AssertionError(); } if (txn != null) txn.commit(); } catch (Exception e) { LOG.error("Exception during DB operation. Aborting transaction.", e); if (txn != null) txn.abort(); throw Throwables.propagate(e); } } @Override public short findAllocation(long id) { final DatabaseEntry key = new DatabaseEntry(Longs.toByteArray(id)); final DatabaseEntry data = new DatabaseEntry(); try (Cursor cursor = allocationDirectory.openCursor(null, CursorConfig.DEFAULT)) { OperationStatus retVal = cursor.getSearchKeyRange(key, data, null); if (retVal == OperationStatus.SUCCESS) { ownerDirectory.put(null, key, SERVER); return Shorts.fromByteArray(data.getData()); } else if (retVal == OperationStatus.NOTFOUND) return (short) -1; throw new AssertionError(); } } @Override public Object beginTransaction() { return env.beginTransaction(null, TransactionConfig.DEFAULT); } @Override public void commit(Object txn) { LOG.debug("commit"); ((Transaction) txn).commit(); } @Override public void abort(Object txn) { LOG.debug("abort"); ((Transaction) txn).abort(); } @Override public void write(long id, short owner, long version, byte[] data, Object txn) { if (LOG.isDebugEnabled()) LOG.debug("WRITE " + hex(id) + " ver: " + version + " data: " + (data != null ? "(" + data.length + " bytes)" : "null")); final DatabaseEntry key = new DatabaseEntry(Longs.toByteArray(id)); final DatabaseEntry dbEntry = new DatabaseEntry(); entryBinding.objectToEntry(new MainMemoryEntry(version, data), dbEntry); mainStore.put((Transaction) txn, key, dbEntry); // try to write owner, but only if nonexistent (i.e will happen at first put only) ownerDirectory.putNoOverwrite((Transaction) txn, key, new DatabaseEntry(Shorts.toByteArray(owner))); } @Override public MainMemoryEntry read(long id) { final DatabaseEntry dbEntry = new DatabaseEntry(); OperationStatus status = mainStore.get(null, new DatabaseEntry(Longs.toByteArray(id)), dbEntry, LockMode.READ_COMMITTED); if (status == OperationStatus.SUCCESS) { final MainMemoryEntry entry = entryBinding.entryToObject(dbEntry); return entry; } else return null; } @Override public void delete(long id, Object txn) { mainStore.delete((Transaction) txn, new DatabaseEntry(Longs.toByteArray(id))); ownerDirectory.delete((Transaction) txn, new DatabaseEntry(Longs.toByteArray(id))); } @Override public long getMaxId() { final long allocationDirectoryMaxId = getMaxId(allocationDirectory); // final long ownerDirecotryMaxId = getMaxId(ownerDirectory); // final long mainStoreMaxId = getMaxId(mainStore); LOG.info("AllocationDirectory max id: {}", allocationDirectoryMaxId); // LOG.info("OwnerDirectory max id: {}", ownerDirecotryMaxId); // LOG.info("MainStore max id: {}", mainStoreMaxId); // // return Math.max(ownerDirecotryMaxId, mainStoreMaxId); return allocationDirectoryMaxId; } private long getMaxId(Database db) { final DatabaseEntry key = new DatabaseEntry(); final DatabaseEntry value = new DatabaseEntry(); try (Cursor cursor = db.openCursor(null, CursorConfig.DEFAULT)) { final OperationStatus status = cursor.getLast(key, value, null); if (status == OperationStatus.SUCCESS) return Longs.fromByteArray(key.getData()); else return 0; } } @Override public void close() { ownerIndex.close(); ownerDirectory.close(); mainStore.close(); allocationDirectory.close(); env.close(); } private static class MainMemoryTupleBinding extends TupleBinding<MainMemoryEntry> { @Override public void objectToEntry(MainMemoryEntry entry, TupleOutput out) { out.writeLong(entry.version); //out.writeUnsignedShort(entry.data.length); out.writeFast(entry.data); } @Override public MainMemoryEntry entryToObject(TupleInput in) { final long version = in.readLong(); //final int dataLength = in.readUnsignedShort(); final int dataLength = in.getBufferLength() - in.getBufferOffset(); final byte[] data = new byte[dataLength]; in.readFast(data); return new MainMemoryEntry(version, data); } } private static class OwnerKeyCreator implements SecondaryKeyCreator { @Override public boolean createSecondaryKey(SecondaryDatabase secondary, DatabaseEntry key, DatabaseEntry data, DatabaseEntry result) { result.setData(data.getData()); return true; } } @Override public void dump(java.io.PrintStream ps) { String home = ""; try { home = env.getHome().getCanonicalPath(); } catch (IOException e) { } ps.println(); ps.println("BERKELEYDB " + home); ps.println("====================================="); ps.println(); printOwners(ps); ps.println(); printMainStore(ps); ps.println(); printOwnerIndex(ps); ps.println(); } public void printOwners(java.io.PrintStream ps) { ps.println("OWNERS"); ps.println("======"); final DatabaseEntry key = new DatabaseEntry(); final DatabaseEntry value = new DatabaseEntry(); try (Cursor cursor = ownerDirectory.openCursor(null, CursorConfig.DEFAULT)) { while (cursor.getNext(key, value, LockMode.DEFAULT) == OperationStatus.SUCCESS) { long id = Longs.fromByteArray(key.getData()); short owner = Shorts.fromByteArray(value.getData()); ps.println("Id : " + hex(id) + " owner: " + owner + ""); } } } public void printMainStore(java.io.PrintStream ps) { ps.println("MAIN STORE"); ps.println("=========="); final DatabaseEntry key = new DatabaseEntry(); final DatabaseEntry value = new DatabaseEntry(); try (Cursor cursor = mainStore.openCursor(null, CursorConfig.DEFAULT)) { while (cursor.getNext(key, value, LockMode.DEFAULT) == OperationStatus.SUCCESS) { long id = Longs.fromByteArray(key.getData()); final MainMemoryEntry entry = entryBinding.entryToObject(value); ps.println("Id : " + hex(id) + " version: " + entry.version + " data: (" + entry.data.length + " bytes)."); } } } public void printOwnerIndex(java.io.PrintStream ps) { ps.println("OWNER INDEX"); ps.println("==========="); final DatabaseEntry sKey = new DatabaseEntry(); final DatabaseEntry pKey = new DatabaseEntry(); final DatabaseEntry value = new DatabaseEntry(); try (SecondaryCursor cursor = ownerIndex.openCursor(null, CursorConfig.DEFAULT)) { while (cursor.getNext(sKey, pKey, value, LockMode.DEFAULT) == OperationStatus.SUCCESS) { long id = Longs.fromByteArray(pKey.getData()); short owner = Shorts.fromByteArray(sKey.getData()); ps.println("Owner: " + owner + " id : " + hex(id)); } } } public static byte[] toByteArray(long value, byte[] array) { array[0] = (byte) (value >> 56); array[1] = (byte) (value >> 48); array[2] = (byte) (value >> 40); array[3] = (byte) (value >> 32); array[4] = (byte) (value >> 24); array[5] = (byte) (value >> 16); array[6] = (byte) (value >> 8); array[7] = (byte) value; return array; } }