package com.ctriposs.sdb; import java.io.Closeable; import java.io.File; import java.io.FilenameFilter; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.PriorityQueue; import java.util.concurrent.CountDownLatch; import com.ctriposs.sdb.stats.FileStatsCollector; import com.ctriposs.sdb.stats.Operations; import com.ctriposs.sdb.stats.SDBStats; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.ctriposs.sdb.merge.Level0Merger; import com.ctriposs.sdb.merge.Level1Merger; import com.ctriposs.sdb.table.AbstractMapTable; import com.ctriposs.sdb.table.FCMapTable; import com.ctriposs.sdb.table.GetResult; import com.ctriposs.sdb.table.HashMapTable; import com.ctriposs.sdb.table.MMFMapTable; import com.google.common.base.Preconditions; /** * A Big, Fast, Persistent K/V Store, Tailored for Session Data * * @author bulldog * */ public class SDB implements Closeable { static final Logger log = LoggerFactory.getLogger(SDB.class); public static final int INMEM_LEVEL = -1; public static final int LEVEL0 = 0; public static final int LEVEL1 = 1; public static final int LEVEL2 = 2; public static final int MAX_LEVEL = 2; private volatile HashMapTable[] activeInMemTables; private Object[] activeInMemTableCreationLocks; private List<LevelQueue>[] levelQueueLists; private final SDBStats stats = new SDBStats(); private String dir; private DBConfig config; private Level0Merger[] level0Mergers; private Level1Merger[] level1Mergers; private CountDownLatch[] countDownLatches; private FileStatsCollector fileStatsCollector; private boolean closed = false; public SDB(String dir) { this(dir, new DBConfig()); } @SuppressWarnings("unchecked") public SDB(String dir, DBConfig config) { this.dir = dir; this.config = config; activeInMemTables = new HashMapTable[config.getShardNumber()]; activeInMemTableCreationLocks = new Object[config.getShardNumber()]; for(int i = 0; i < config.getShardNumber(); i++) { activeInMemTableCreationLocks[i] = new Object(); } // initialize level queue list levelQueueLists = new ArrayList[config.getShardNumber()]; for(int i = 0; i < config.getShardNumber(); i++) { levelQueueLists[i] = new ArrayList<LevelQueue>(MAX_LEVEL + 1); for(int j = 0; j <= MAX_LEVEL; j++) { levelQueueLists[i].add(new LevelQueue()); } } try { this.loadMapTables(); } catch (Exception ex) { throw new RuntimeException("Fail to load on disk map tables!", ex); } this.fileStatsCollector = new FileStatsCollector(stats, levelQueueLists); this.fileStatsCollector.start(); this.startLevelMergers(); } private void startLevelMergers() { countDownLatches = new CountDownLatch[this.config.getShardNumber()]; for(int i = 0; i < this.config.getShardNumber(); i++) { countDownLatches[i] = new CountDownLatch(2); } level0Mergers = new Level0Merger[config.getShardNumber()]; level1Mergers = new Level1Merger[config.getShardNumber()]; for(short i = 0; i < this.config.getShardNumber(); i++) { level0Mergers[i] = new Level0Merger(this, this.levelQueueLists[i], countDownLatches[i], i, stats); level0Mergers[i].start(); level1Mergers[i] = new Level1Merger(this, this.levelQueueLists[i], countDownLatches[i], i, stats); level1Mergers[i].start(); } } private void loadMapTables() throws IOException, ClassNotFoundException { File dirFile = new File(dir); if (!dirFile.exists()) { dirFile.mkdirs(); } String fileNames[] = dirFile.list(new FilenameFilter() { @Override public boolean accept(File dir, String filename) { if (filename.endsWith(AbstractMapTable.INDEX_FILE_SUFFIX)) return true; return false; } }); // new DB, setup new active map table if (fileNames == null || fileNames.length == 0) { for(short i = 0; i < this.config.getShardNumber(); i++) { this.activeInMemTables[i] = new HashMapTable(dir, i, LEVEL0, System.nanoTime()); this.activeInMemTables[i].markUsable(true); this.activeInMemTables[i].markImmutable(false); // mutable this.activeInMemTables[i].setCompressionEnabled(this.config.isCompressionEnabled()); } return; } PriorityQueue<AbstractMapTable> pq = new PriorityQueue<AbstractMapTable>(); for(String fileName : fileNames) { int dotIndex = fileName.lastIndexOf("."); if (dotIndex > 0) { fileName = fileName.substring(0, dotIndex); } String[] parts = fileName.split("-"); Preconditions.checkArgument(parts != null && parts.length == 3, "on-disk table file names corrupted!"); int level = Integer.parseInt(parts[1]); if (level == LEVEL0) { pq.add(new HashMapTable(dir, fileName)); } else if (level == LEVEL1) { pq.add(new MMFMapTable(dir, fileName)); } else { pq.add(new FCMapTable(dir, fileName)); } } Preconditions.checkArgument(pq.size() > 0, "on-disk table file names corrupted!"); // setup active map table for(int i = 0; i < this.config.getShardNumber(); i++) { AbstractMapTable table = pq.poll(); Preconditions.checkArgument(table.getLevel() == 0, "on-disk table file names corrupted, no level 0 map tables"); this.activeInMemTables[table.getShard()] = (HashMapTable) table; this.activeInMemTables[table.getShard()].markUsable(true); this.activeInMemTables[table.getShard()].markImmutable(false); // mutable this.activeInMemTables[table.getShard()].setCompressionEnabled(this.config.isCompressionEnabled()); } while(!pq.isEmpty()) { AbstractMapTable table = pq.poll(); if (table.isUsable()) { int level = table.getLevel(); LevelQueue lq = levelQueueLists[table.getShard()].get(level); lq.addLast(table); } else { // garbage table.close(); table.delete(); } } } public String getDir() { return this.dir; } public DBConfig getConfig() { return this.config; } public SDBStats getStats() { return this.stats; } /** * Put key/value entry into the DB with no timeout * * @param key the map entry key * @param value the map entry value */ public void put(byte[] key, byte[] value) { this.put(key, value, AbstractMapTable.NO_TIMEOUT, System.currentTimeMillis(), false); } /** * Put key/value entry into the DB with specific timeToLive * * @param key the map entry key * @param value the map entry value * @param timeToLive time to live */ public void put(byte[] key, byte[] value, long timeToLive) { this.put(key, value, timeToLive, System.currentTimeMillis(), false); } /** * Delete map entry in the DB with specific key * * @param key the map entry key */ public void delete(byte[] key) { this.put(key, new byte[] {0}, AbstractMapTable.NO_TIMEOUT, System.currentTimeMillis(), true); } private short getShard(byte[] key) { int keyHash = Arrays.hashCode(key); keyHash = Math.abs(keyHash); return (short) (keyHash % this.config.getShardNumber()); } private void put(byte[] key, byte[] value, long timeToLive, long createdTime, boolean isDelete) { Preconditions.checkArgument(key != null && key.length > 0, "key is empty"); Preconditions.checkArgument(value != null && value.length > 0, "value is empty"); ensureNotClosed(); long start = System.nanoTime(); String operation = isDelete ? Operations.DELETE : Operations.PUT; try { short shard = this.getShard(key); boolean success = this.activeInMemTables[shard].put(key, value, timeToLive, createdTime, isDelete); if (!success) { // overflow synchronized(activeInMemTableCreationLocks[shard]) { success = this.activeInMemTables[shard].put(key, value, timeToLive, createdTime, isDelete); // other thread may have done the creation work if (!success) { // move to level queue 0 this.activeInMemTables[shard].markImmutable(true); LevelQueue lq0 = this.levelQueueLists[shard].get(LEVEL0); lq0.getWriteLock().lock(); try { lq0.addFirst(this.activeInMemTables[shard]); } finally { lq0.getWriteLock().unlock(); } @SuppressWarnings("resource") HashMapTable tempTable = new HashMapTable(dir, shard, LEVEL0, System.nanoTime()); tempTable.markUsable(true); tempTable.markImmutable(false); //mutable tempTable.put(key, value, timeToLive, createdTime, isDelete); // switch on this.activeInMemTables[shard] = tempTable; } } } } catch(IOException ioe) { stats.recordDBError(operation); if (isDelete) { throw new RuntimeException("Fail to delete key, IOException occurr", ioe); } throw new RuntimeException("Fail to put key & value, IOException occurr", ioe); } finally { stats.recordDBOperation(operation, INMEM_LEVEL, System.nanoTime() - start); } } /** * Get value in the DB with specific key * * @param key map entry key * @return non-null value if the entry exists, not deleted or expired. * null value if the entry does not exist, or exists but deleted or expired. */ public byte[] get(byte[] key) { Preconditions.checkArgument(key != null && key.length > 0, "key is empty"); ensureNotClosed(); long start = System.nanoTime(); int reachedLevel = INMEM_LEVEL; try { short shard = this.getShard(key); // check active hashmap table first GetResult result = this.activeInMemTables[shard].get(key); if (result.isFound()) { if (!result.isDeleted() && !result.isExpired()) { return result.getValue(); } else { return null; // deleted or expired } } else { // check level0 hashmap tables reachedLevel = LEVEL0; LevelQueue lq0 = levelQueueLists[shard].get(LEVEL0); lq0.getReadLock().lock(); try { if (lq0 != null && lq0.size() > 0) { for(AbstractMapTable table : lq0) { result = table.get(key); if (result.isFound()) break; } } } finally { lq0.getReadLock().unlock(); } if (result.isFound()) { if (!result.isDeleted() && !result.isExpired()) { if (result.getLevel() == SDB.LEVEL2 && this.config.isLocalityEnabled()) { // keep locality this.put(key, result.getValue(), result.getTimeToLive(), result.getCreatedTime(), false); } return result.getValue(); } else { return null; // deleted or expired } } // check level 1-2 on disk sorted tables searchLevel12: { for(int level = 1; level <= MAX_LEVEL; level++) { reachedLevel = level; LevelQueue lq = levelQueueLists[shard].get(level); lq.getReadLock().lock(); try { if (lq.size() > 0) { for(AbstractMapTable table : lq) { result = table.get(key); if (result.isFound()) break searchLevel12; } } } finally { lq.getReadLock().unlock(); } } } if (result.isFound()) { if (!result.isDeleted() && !result.isExpired()) { if (result.getLevel() == SDB.LEVEL2 && this.config.isLocalityEnabled()) { // keep locality this.put(key, result.getValue(), result.getTimeToLive(), result.getCreatedTime(), false); } return result.getValue(); } else { return null; // deleted or expired } } } } catch(IOException ioe) { stats.recordDBError(Operations.GET); throw new RuntimeException("Fail to get value by key, IOException occurr", ioe); } finally { stats.recordDBOperation(Operations.GET, reachedLevel, System.nanoTime() - start); } return null; // no luck } @Override public void close() throws IOException { if (closed) return; fileStatsCollector.setStop(); for(int i = 0; i < config.getShardNumber(); i++) { this.activeInMemTables[i].close(); } for(int i = 0; i < config.getShardNumber(); i++) { this.level0Mergers[i].setStop(); this.level1Mergers[i].setStop(); } for(int i = 0; i < config.getShardNumber(); i++) { try { log.info("Shard " + i + " waiting level 0 & 1 merge threads to exit..."); this.countDownLatches[i].await(); } catch (InterruptedException e) { // ignore; } } for(int i = 0; i < config.getShardNumber(); i++) { for(int j = 0; j <= MAX_LEVEL; j++) { LevelQueue lq = this.levelQueueLists[i].get(j); for(AbstractMapTable table : lq) { table.close(); } } } closed = true; log.info("DB Closed."); } /** * Delete all back files; * */ public void destory() { Preconditions.checkArgument(closed, "Can't delete DB in open status, please close first."); for(int i = 0; i < config.getShardNumber(); i++) { this.activeInMemTables[i].delete(); } for(int i = 0; i < config.getShardNumber(); i++) { for(int j = 0; j <= MAX_LEVEL; j++) { LevelQueue lq = this.levelQueueLists[i].get(j); for(AbstractMapTable table : lq) { table.delete(); } } } } protected void ensureNotClosed() { if (closed) { throw new IllegalStateException("You can't work on a closed SDB."); } } }