KevaDB.java example

Explorer
zava-master
- src
/**
 * Copyright 2013 Oak Ridge National Laboratory
 * Author: James Horey <horeyjl@ornl.gov>
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 **/
package gov.ornl.keva.node;

/**
 * Java libs.
 **/
import java.util.Map;
import java.util.HashMap;
import java.util.NavigableMap;
import java.util.TreeMap;
import java.util.List;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Comparator;
import java.nio.file.Path;
import java.nio.file.Paths;

/**
 * SEDA libs.
 **/
import gov.ornl.seda.SEDAFuture;

/**
 * Configuration libs.
 **/
import gov.ornl.config.ConfigFactory;
import gov.ornl.config.Configuration;
import gov.ornl.config.ConfigEntry;

/**
 * Keva libs.
 **/
import gov.ornl.keva.sstable.SSTable;
import gov.ornl.keva.mem.MemTable;
import gov.ornl.keva.mem.MemTableAllocator;
import gov.ornl.keva.table.TableValueFactory;
import gov.ornl.keva.table.TableKey;
import gov.ornl.keva.table.TableValue;
import gov.ornl.keva.core.KevaDBException;
import gov.ornl.keva.core.VectorClock;
import gov.ornl.keva.core.PruneOptions;
import gov.ornl.keva.core.ReadOptions;
import gov.ornl.keva.core.WriteOptions;
import gov.ornl.keva.core.OpenOptions;
import gov.ornl.keva.core.StreamIterator;
import gov.ornl.keva.core.TreeUnionIterator;
import gov.ornl.keva.loader.JobLoader;

/**
 * KevaDB is the primary mechanism to interact with databases. 
 * Each database is represented and mediated by a KevaDB instance, and
 * has a simple API to interact with the underlying data. The data model
 * used by KevaDB is based on a history of values defined by vector clocks. 
 * This means that values may persist for a long time (even if users write to
 * the same key), and that values may fork (depending on the vector clock
 * assigned to the value). 
 * 
 * @author James Horey
 */
public class KevaDB {
    /**
     * Identify this database. 
     **/
    private String db;

    /**
     * Client ID clock to use for system operations (like delete). 
     **/
    private volatile int systemClock = 0;

    /**
     * The pruning options are used to filter results
     * while reading. It also serves as a way to remove
     * unnecessary items from the sstables. 
     **/
    private PruneOptions pruneOptions;

    /**
     *  Used to sort independent values.
     **/
    private Comparator<TableValue> comparator;

    /**
     * Memtable is where all the data actually resides. 
     **/
    private MemTable table;
    private MemTableAllocator memAllocator;
    private long memTableFlushSize;

    /**
     * Durable storage.
     **/
    private SSTableService diskService;
    private WriteAheadLog wal;

    /**
     * Configuration information.
     **/
    private String dataPath;
    private String logPath;
    private String configFile; 
    private ConfigFactory configFactory;

    /**
     * This is not a public constructor. To instantiate
     * KevaDB objects, use the factory. 
     **/
    protected KevaDB(String db, String configFile) {
	this.db = db;

	// These are where the configuration files live. 
	dataPath = null;
	logPath = null;

	// Set up default pruning options. 
	// This is overriden by the config options. 
	pruneOptions = new PruneOptions();

	// Dfault comparator sorts values by wall time. 
	comparator =
	    new Comparator<TableValue>() {
	    public int compare(TableValue v1, TableValue v2) {
		long d = 
		v1.getClock().getLocalTime() - 
		v2.getClock().getLocalTime();

		return (int)d;
	    }
	};

	// Set up the SSTable. 
	diskService = SSTableService.newInstance();

	this.configFile = configFile;
	configFactory = new ConfigFactory();
	loadConfig(configFile); // Load configuration

	// Set up the MemTable. 
	memAllocator = new MemTableAllocator();
	table =	memAllocator.newMemTable(memTableFlushSize,
					 comparator);
	table.setPruneOptions(pruneOptions);
    }

    /**
     * Start the database. Used by the factory to finish
     * initialization of the DB.
     */
    protected void start() {
	// Configure the write-ahead log.
	wal = new WriteAheadLog(this, configFile);
	wal.createLog();
    }

    /**
     * Load up all the configuration files.
     **/
    private void loadConfig(String c) {
	Configuration conf;
	ConfigEntry entry;

	if(c != null) {
	    Path p = Paths.get(c);
	    conf = configFactory.getConfig(p.toAbsolutePath().toString());
	    if(conf != null) {
		setStorage(conf); // Set the storage directories. 
	    }
	}
    }

    /**
     * Set the various storage directories. 
     **/
    @SuppressWarnings("unchecked")
    private void setStorage(Configuration conf) {
	ConfigEntry entry = null;

	entry = conf.get("keva.data.dir");
	if(entry != null) { // Try to use the values set. 
	    List<String> vv = entry.getEntry("value").getValues();
	    if(vv.size() > 0) {
	    	dataPath = vv.get(0).trim() +
		    System.getProperty("file.separator") + db;
	    }
	}

	// Set the WAL directory. 
	entry = conf.get("keva.wal.dir");
	if(entry != null) {
	    List<String> vv = entry.getEntry("value").getValues();
	    if(vv.size() > 0) {
		logPath = vv.get(0).trim() +
		    System.getProperty("file.separator") + db;
	    }
	}

	// The sstable implementation. 
	entry = conf.get("keva.sstable.impl");
	if(entry != null) {
	    List<String> vv = entry.getEntry("value").getValues();
	    for(String v : vv) {
		String[] s = v.split(":");
		if(s.length == 1) {
		    diskService.addDB(this, s[0], null);
		}
		else {
		    diskService.addDB(this, s[0], s[1]);
		}
	    }
	}

	// Set up the sorting. 
	entry = conf.get("keva.sort");
	if(entry != null) {
	    List<String> vv = entry.getEntry("value").getValues();
	    if(vv.size() == 1) {
		// Just the name of the class. Assume that we can find
		// the class using the system loader.
		String clazz = vv.get(0).trim();
		Object obj = JobLoader.load(clazz, null, null);
		if(obj != null && obj instanceof Comparator) {
		    comparator = (Comparator<TableValue>)obj;
		}
	    }		
	    else if(vv.size() == 2) {
		// Name of the comparator jar & class. 
		String jar = vv.get(0).trim();
		String clazz = vv.get(1).trim();
		Path p = Paths.get(jar);

		Object obj = JobLoader.load(clazz, p.toAbsolutePath().toString(), null);
		if(obj != null && obj instanceof Comparator) {
		    comparator = (Comparator<TableValue>)obj;
		}
	    }
	}

	// Should we prune the deleted items from the history? 
	entry = conf.get("keva.prune.delete");
	if(entry != null) {
	    List<String> vv = entry.getEntry("value").getValues();
	    if(vv.size() > 0) {
		pruneOptions.delete = Boolean.valueOf(vv.get(0).trim()).booleanValue();
	    }
	}

	// Should we prune older values? 
	entry = conf.get("keva.prune.history");
	if(entry != null) {
	    List<String> vv = entry.getEntry("value").getValues();
	    if(vv.size() > 0) {
		pruneOptions.newest = Integer.parseInt(vv.get(0).trim());
	    }
	}

	// how large before flushing memtables.
	memTableFlushSize = MemTable.RECOMMENDED_THRESHOLD;
	entry = conf.get("keva.memtable.threshold");
	if(entry != null) {
	    List<String> vv = entry.getEntry("value").getValues();
	    if(vv.size() > 0) {
		memTableFlushSize = Integer.parseInt(vv.get(0).trim());
	    }
	}
    }

    /**
     * Set the sstable implementation class. 
     *
     * @param className Name of the sstable class
     * @param jar Name of the jar file containing the implementation (optional)
     */
    public void setSSTableImplementation(String className, String jar) {
	diskService.addDB(this, className, jar);
    }

    /**
     * ID that uniquely identifies this database. 
     *
     * @return The string representation of the ID. 
     */
    public String getID() {
	return db;
    }

    /**
     * Define the path where the sstables are stored. This value is also
     * defined by the configuration parameter "keva.data.dir". 
     *
     * @param path The path where the data is stored. 
     */
    public void setDataPath(String path) {
	dataPath = path;
    }

    /**
     * Return the path where the sstables are stored. 
     * 
     * @return The path where the data is stored. 
     */
    public String getDataPath() {
	return dataPath;
    }

    /**
     * Define the log directory. The log directory is where all the WAL logs live.
     * 
     * @param path The log directory path. 
     */
    public void setLogPath(String path) {
	logPath = path;
    }

    /**
     * Return the log directory path. 
     * 
     * @return The log path in string representation. 
     */
    public String getLogPath() {
	return logPath;
    }

    /**
     * Get the SSTable service.
     **/
    protected SSTableService getDiskService() {
	return diskService;
    }

    /**
     * Create a new empty memtable. 
     **/
    protected void format() {
	diskService.format(this);

	if(wal != null) {
	    wal.clear();
	}
    }

    /**
     * Close a database. 
     */
    public void close() 
	throws KevaDBException {
	// Set the memtable to null so that nothing
	// else can write to it. 
	MemTable oldTable = table;
	table = null;

	// We need to wait for any existing writes to finish. 
	// Easiest way to do this is just try locking each key.
	for(Iterator<TableKey> keys = oldTable.getKeys();
	    keys.hasNext(); ) {
	    TableKey key = keys.next(); 
	    oldTable.lock(key);
	    oldTable.unlock(key);
	}

	// Flush the table to disk. 
	if(oldTable.getNumKeys() > 0) {
	    diskService.flush(this, oldTable);
	}

	// Probably a bug, since we don't know when the table is actually free!
	memAllocator.freeMemTable(oldTable); 

	// Now get rid of old entries in the WAL, including
	// any in-memory buffers. 
	wal.clear();
    }

    /**
     * Recover the database from logs. 
     */
    public void recover() 
	throws KevaDBException {
	// Disable the current WAL so that we don't
	// record the playback. 
	WriteAheadLog temp = wal;
	wal = null;

	// Replay the old WAL. 
	WriteAheadLog oldLog = new WriteAheadLog(this, configFile);
	oldLog.replay();

	// Now set up our wal again. 
	wal = temp;
    }

    /**
     * Force all the sstables in the first level to be merged. 
     */
    public void forceMerge() {
	SEDAFuture future = 
	    diskService.forceMerge(this);

	future.get();
    }

    /**
     * Flush the current memtable to disk and make an sstable. 
     * This normally happens when the memtable grows too large, but
     * the user can force the issue if necessary. 
     */
    public void flush() { 
	MemTable oldTable = table;

	// Do not flush an empty table. 
	if(oldTable.getNumKeys() > 0) {
	    // Replace with new memtable.
	    table = memAllocator.newMemTable(memTableFlushSize,
					     comparator);

	    // Wait for all the writers to be complete
	    // on the old table. 
	    oldTable.flush();

	    // Flush the table to disk. 
	    SEDAFuture future = 
		diskService.flush(this, oldTable);

	    // Wait for the job to complete, before freeing
	    // the old memtable. 
	    future.get();
	    memAllocator.freeMemTable(oldTable); 

	    // Now get rid of old entries in the WAL. 
	    if(wal != null) {
		diskService.lockForRead();
		wal.recycle(System.currentTimeMillis());
		diskService.unlockForRead();
	    }

	    // Finaly check if we need to merge any of the
	    // tables in any level.
	    diskService.mergeIfNecessary(this);
	}
    }

    /**
     * Commit a tentative value to memory. A tentative value
     * is a value that is already in the memtable, but is not
     * visible. Since the value isn't visible it won't be flushed
     * to an sstable until the value is commited. This is useful
     * when we need to atomically commit multiple values. 
     *
     * @param key The key of the value to commit.
     * @param value The value to commit. We actually just need the vector clock.
     * @param options Write options associated with this value. 
     */
    public void commit(final TableKey key, 
		       final TableValue value,
		       final WriteOptions options) {
	table.lock(key);
	if(options != null) {
	    table.commit(key, value, options.branch);
	}
	else {
	    table.commit(key, value, null);
	}
	table.unlock(key);
    }

    /**
     * Delete the value from the database. This does not actually
     * remove the value from the database, but simply marks it for deletion.
     * The user must define a pruning option to actually get rid of the value. 
     * 
     * @param client Unique ID representing the client. Normally the vector clock
     * associated with a value is sufficient to define the client, but since the
     * delete operation does not have a value parameter, we must supply another client ID. 
     * @param key The key of the value to delete. 
     */
    public void delete(final TableKey key) {
	// We must construct a new "delete" table value, and then
	// place this value along every single branch. In order
	// to implement this properly, we must first get the collapsed
	// values on every branch, and then perform a write on each
	// branch with the right vector clock. To do this atomically, we
	// must lock this specific key. 
	table.lock(key);

	// Get all the value histories. 
	Map<String,StreamIterator<TableValue>> memValues = 
	    table.getCollapsed(key);
	for(String branch : memValues.keySet()) {
	    // Create a new "delete" value. The delete value will
	    // also need a vector clock that is new enough. 
	    TableValue delete = TableValueFactory.newValue(TableValue.DELETE);
	    delete.setClock(new VectorClock("sys".getBytes(), systemClock++));

	    // Log the write into the WAL.
	    if(wal != null) {
	        wal.put(key, delete, null);
	    }

	    // Place the delete operatation into the memtable. 
	    table.put(key, delete, branch, false);
	}

	table.unlock(key);

	// Check if we need to flush. 
	if(table.shouldFlush()) { 
	    flush();
	}
    }

    /**
     * Place a new value into the database. This method assumes that the
     * client has defined the value vector clock already, and does not
     * have any specific writing options. 
     * 
     * @param key The key of the value. 
     * @param value The value to place into the database. 
     */
    public void put(final TableKey key, 
		    final TableValue value) {
	put(key, value, null);
    }

    /**
     * Place a new value into the database.
     * 
     * @param key The key of the value. 
     * @param value The value to place into the database. 
     * @param options Write options that define how the 
     * value is written to the memtable. 
     */
    public void put(final TableKey key, 
		    final TableValue value,
		    final WriteOptions options) {
	// Log the write into the WAL.
	if(wal != null) {
	    wal.put(key, value, options);
	}

	if(options != null) {
	    // Check if we need to insert into a specific branch. 
	    if(options.branch != null) {
		table.put(key, value, options.branch, options.tentative);
	    }
	    else {
		table.put(key, value, options.tentative);
	    }
	}
	else {
	    table.put(key, value, false);
	}

	// Check if we need to flush. 
	if(table.shouldFlush()) { 
	    flush();
	}
    }

    /**
     * Apply multiple writes atomically. 
     *
     * @param ops The batch write operations.
     */
    public boolean put(final WriteBatch ops) {
	// Lock all the keys associated with this batch. 
	// This makes sure that we do not insert other
	// items while inserting the batch. 
    	Iterator<TableKey> iter = ops.iterator();
    	while(iter.hasNext()) {
    	    TableKey key = iter.next();

	    // We might have to create the bucket before
	    // locking it (otherwise it causes a lock error).
	    table.create(key);
    	    table.lock(key);
    	}

	// Perform all the actual writes. The memtable has a special
	// "commit" method that is similar to "put" except that it doesn't
	// perform any special locking. 
	iter = ops.iterator();
	while(iter.hasNext()) {
	    TableKey key = iter.next();
	    for(WriteBatch.TableWrite write : ops.getValues(key)) {
		if(write.options == null) {
		    table.commit(key, write.value, null);
		}
		else {
		    table.commit(key, write.value, write.options.branch);
		}
	    }
	}

    	// We are all done so unlock all the keys. 
    	iter = ops.iterator();
    	while(iter.hasNext()) {
    	    TableKey key = iter.next();
    	    table.unlock(key);
    	}
    	return true;
    }

    /**
     * Help retrieve latest data. 
     */
    private NavigableMap<String, StreamIterator<TableValue>> getHelper(final TableKey key,
								       final ReadOptions options) { 
	Map<String,StreamIterator<TableValue>> memValues = null;
	List<Map<String,StreamIterator<TableValue>>> ssValues;

	// Check if there are any valid options. 
	if(options == null ||
	   (options.branch == null && options.time == -1)) {
	    // Get from all the sstables. 
	    ssValues = getFromSSTable(key, null, -1);

	    // Get all the independent values associated with this key. 
	    memValues = table.getCollapsed(key);
	}
	else {
	    if(options.branch != null) {
		// Get only the value associated with the branch.
		// There is only one value in this iterator. 
		ssValues = getFromSSTable(key, options, 0);
		memValues = table.getCollapsed(key, options.branch);
	    }
	    else {
		// Then find all the values associated with that wall time. 
		ssValues = getFromSSTable(key, null, options.time);
		memValues = table.getCollapsed(key, options.time);
	    }
	}

	// Collect all the iterators. 
	if(memValues != null) {
	    ssValues.add(memValues);
	}

	// Now merge the independent branches. 
	return SSTableService.collateBranches(ssValues, true, comparator);
    }

    /**
     * Get the latest independent values.
     * 
     * @param key The key identifying the value. 
     * @return An iterator over the latest independent values. 
     */
    public Map<String, StreamIterator<TableValue>> get(final TableKey key) { 
	return getHelper(key, null);
    }

    /**
     * Read the latest independent values while applying the read options. 
     * 
     * @param key The key identifying the value. 
     * @param options Read options that specify clock constraints, ordering, etc. 
     * @return An iterator over the latest independent values. 
     **/
    public StreamIterator<TableValue> get(final TableKey key,
					  final String branch) {
	Map<String,StreamIterator<TableValue>> values = null;

	// First try the memtable. If it is found here, then
	// we can stop searching since the memtable always has
	// the latest value. 
	values = table.getCollapsed(key, branch);
	if(values != null) {
	    return values.get(branch);
	}

	// Now search for the data in the sstables. However
	// we should search in level order. 
	values = getLatestByLevel(key, branch);
	if(values != null) {
	    return values.get(branch);
	}

	// Couldn't find it in the memtable or sstables. That
	// means it doesn't exist!
	return null;
    }

    /**
     * Read the latest independent values while applying the read options. 
     * 
     * @param key The key identifying the value. 
     * @param time The wall time 
     * @return An iterator over the latest independent values. 
     **/
    public Map<String, StreamIterator<TableValue>> get(final TableKey key,
						       final long time) {
	// Create a new read option.
	ReadOptions options = new ReadOptions();
	options.time = time;

	return getHelper(key, options);
    }

    /**
     * Read the values associated with the list of keys. If the
     * user supplies a read option, then we use those options to
     * synchronize the reads. Otherwise, we will use the latest values. 
     * 
     * @param keys List of keys identifying the values. 
     * @param options Read options that specify clock constraints, ordering, etc. 
     * @return A map associating the latest independent values for each key. 
     */
    public Map<TableKey, Map<String, StreamIterator<TableValue>>> get(final List<TableKey> keys,
								      final ReadOptions options) {

	Map<TableKey, Map<String, StreamIterator<TableValue>>> iters = 
	    new HashMap<>();

	// Apply the read option in the following manner:
	// 
	// (0) If there are no options, then just run over the latest
	//     values from the keys. 
	// (1) If the user has specified a branch, then we apply
	//     that branch to every key.
	// (2) If the user has specified a vector clock, then only
	//     apply that clock to the first key. Then use the
	//     wall time for the other keys. 
	if(options == null) {
	    for(TableKey k : keys) {
		iters.put(k, getHelper(k, null));
	    }
	}
	else if(options.branch != null) {
	    for(TableKey k : keys) {
		iters.put(k, getHelper(k, options));
	    }
	}
	else if(options.time != -1) {
	    for(TableKey k : keys) {
		iters.put(k, getHelper(k, options));
	    }
	}

	return iters;
    }

    /**
     * Help retrieve historical values
     *
     * @param key Table key
     * @param options Read options
     */
    private NavigableMap<String, StreamIterator<TableValue>> getHistoryHelper(final TableKey key,
									      final String branch) {
	List<Map<String,StreamIterator<TableValue>>> values =
	    new ArrayList<>();

	// Get the data from the memtable. 
	Map<String,StreamIterator<TableValue>> value = null;
	if(branch != null) {
	    value = table.getUncollapsed(key, branch);
	}
	else {
	    value = table.getAll(key);
	}

	if(value != null) {
	    values.add(value);
	}

	// Get the data from the sstables.
	diskService.lockForRead();
	Map<String, Integer> tables = 
	    diskService.getDataManifests(this, 0, SSTableService.MAX_LEVELS);
	for(String t : tables.keySet()) {
	    SSTable ss = diskService.getSSTable(this, t, tables.get(t));
	    if(ss != null) {
		value = null;
		if(ss.contains(key) && branch != null) {
		    value = ss.getUncollapsed(key, branch);
		}

		if(value != null) {
		    values.add(value);
		}
	    }
	}
	diskService.unlockForRead();

	// Now merge all the histories.
	return SSTableService.collateBranches(values, false, comparator);
    }

    /**
     * Get the history of the values associated with the key. The
     * option is used to control which specific values are returned. 
     * 
     * @param key The key identifying the value. 
     * @return An iterator over the history of values along the branch specified by the options. 
     **/
    public Map<String, StreamIterator<TableValue>> getHistory(final TableKey key) {
	return getHistoryHelper(key, null);
    }

    /**
     * Get the history of the values associated with the key. The
     * option is used to control which specific values are returned. 
     * 
     * @param key The key identifying the value. 
     * @param options Read options that specify a specific branch of values. 
     * @return An iterator over the history of values along the branch specified by the options. 
     **/
    public Iterator<TableValue> getHistory(final TableKey key,
					   final String branch) {
	NavigableMap<String,StreamIterator<TableValue>> histories = 
	    getHistoryHelper(key, branch);

	return histories.get(branch);
    }

    /**
     * Iterate over all the keys in sorted order. Be warned that this
     * is an expensive operation since we need to scan all the 
     **/
    public Iterator<TableKey> iterator() {
	List<Iterator<? extends TableKey>> keys = new ArrayList<>();

	// Get the keys from the memtable. 
	keys.add(table.getKeys());

	// Get the keys from the sstables.
	Map<String, Integer> tables = 
	    diskService.getDataManifests(this, 0, SSTableService.MAX_LEVELS);
	for(String t : tables.keySet()) {
	    SSTable ss = diskService.getSSTable(this, t, tables.get(t));
	    if(ss != null) {
		Iterator<TableKey> k = ss.getKeys();
		if(k != null) {
		    keys.add(k);
		}
	    }
	}

	// Specify how to compare table keys. 
	Comparator<TableKey> comp = 
	    new Comparator<TableKey>() {
	    public int compare(TableKey k1, TableKey k2) {
		return k1.compareTo(k2);
	    }
	};

	// Create a new merge iterator that will return all
	// the keys in sorted order. 
	// return new UnionIterator<TableKey>(keys, comp);
	return new TreeUnionIterator<TableKey>(keys, comp);
    }

    /**
     * Fetch a value from a specific branch from the sstables. Because we want
     * the latest value we can search in level order. So it is found in level0
     * then we don't need to look in level1, etc. That is because things age in 
     * level order. 
     */
    private Map<String,StreamIterator<TableValue>> getLatestByLevel(final TableKey key,
								    final String branch) {
	diskService.lockForRead();
	Map<String, Integer> tables = 
	    diskService.getDataManifests(this, 0, SSTableService.MAX_LEVELS);

	Map<Integer,List<String>> manifestByLevel = new TreeMap<>();
	for(String uuid : tables.keySet()) {
	    Integer level = tables.get(uuid);
	    List<String> sstables = manifestByLevel.get(level);
	    if(sstables == null) {
	        sstables = new ArrayList<>();
		manifestByLevel.put(level, sstables);
	    }
	    sstables.add(uuid);
	}

	List<Map<String,StreamIterator<TableValue>>> values = 
	    new ArrayList<>();
	for(Integer level : manifestByLevel.keySet()) {
	    values.clear();
	    for(String uuid : manifestByLevel.get(level)) {
		SSTable ss = diskService.getSSTable(this, uuid, level);
		if(ss != null && ss.contains(key)) {
		    Map<String, StreamIterator<TableValue>> value = 
			ss.getCollapsed(key, branch);
		    if(value != null && value.size() > 0) {
			values.add(value);
		    }
		}
	    }

	    // Now see if we can collate these results.
	    if(values.size() > 0) {
		diskService.unlockForRead();
		return SSTableService.collateBranches(values, false, comparator);
	    }
	}

	diskService.unlockForRead();
	return null;
    }

    /**
     * Fetch a value from the sstables. 
     **/
    private List<Map<String,StreamIterator<TableValue>>> getFromSSTable(final TableKey key,
									final ReadOptions options,
									final long time) {
	List<Map<String,StreamIterator<TableValue>>> bucket = 
	    new ArrayList<>();

	diskService.lockForRead();
	Map<String, Integer> tables = 
	    diskService.getDataManifests(this, 0, SSTableService.MAX_LEVELS);
	for(String t : tables.keySet()) {
	    SSTable ss = diskService.getSSTable(this, t, tables.get(t));
	    if(ss != null) {
		// First check if this sstable has this key. 
		// This might result in a false positive, but we
		// check for improper iterators as well.
		if(ss.contains(key)) {

		    Map<String,StreamIterator<TableValue>> ssValues = null;
		    if(options != null &&
		       options.branch != null) {
			ssValues = ss.getCollapsed(key, options.branch);
		    }
		    else if(options == null && time != -1) {
			ssValues = ss.getCollapsed(key, time);
		    }
		    else {
			ssValues = ss.getCollapsed(key);
		    }

		    if(ssValues != null) {
			bucket.add(ssValues);
		    }
		}
	    }
	}
	diskService.unlockForRead();
	return bucket;
    }
 }