package org.opendedup.sdfs.filestore;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.concurrent.locks.ReentrantLock;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.opendedup.collections.CSByteArrayLongMap;
import org.opendedup.collections.HashtableFullException;
import org.opendedup.sdfs.Main;
import org.opendedup.util.HashFunctions;
import org.opendedup.util.StringUtils;
/**
*
* @author Sam Silverberg
*
* The TCHashStore stores data locations within the ChunkStore service.
* Dedupe data is stored within a chunkstore but it is referenced within
* the TCHashStore for lookup and retrieval. Meta-data associated with
* the TCHashStore. The hashstore implements the B-Tree table within the
* TokyoCabinet library. Data is indexed by the hash and is stored as a
* serialized PersistantDedupChunk. The TCHashStore is located on disk
* based on @see com.annesam.sdfs.Main#hashDBStore.
*
* ChuckStore Service communication flows as follows:
*
* sdfs client <-TCP-> ClientThread <-> HashChunkService <->TCHashStore
* <->AbstractChunkStore
*
*/
public class HashStore {
// A lookup table for the specific hash store based on the first byte of the
// hash.
CSByteArrayLongMap bdb = null;
// the name of the hash store. This is usually associate with the first byte
// of all possible hashes. There should
// be 256 total hash stores.
private String name;
// Lock for hash queries
private ReentrantLock hashlock = new ReentrantLock();
// The chunk store used to store the actual deduped data;
//private AbstractChunkStore chunkStore = null;
// Instanciates a FileChunk store that is shared for all instances of
// hashstores.
// private static ChunkStoreGCScheduler gcSched = new
// ChunkStoreGCScheduler();
private static Logger log = Logger.getLogger("sdfs");
private boolean closed = true;
private static byte [] blankHash = null;
private static byte [] blankData = null;
static {
blankData = new byte [Main.chunkStorePageSize];
try {
blankHash = HashFunctions.getTigerHashBytes(blankData);
} catch (Exception e) {
log.log(Level.SEVERE,"unable to hash blank hash",e);
}
}
/**
* Instantiates the TC hash store.
*
* @param name
* the name of the hash store.
* @throws IOException
*/
public HashStore() throws IOException {
this.name = "sdfs";
try {
this.connectDB();
} catch (Exception e) {
e.printStackTrace();
}
//this.initChunkStore();
log.info("Total Entries " + + bdb.getSize());
log.info("Added " + this.name);
this.closed = false;
}
/**
*
* @return the total number of entries stored in this database
*
*/
public long getEntries() {
return bdb.getSize();
}
public long getMaxEntries() {
return this.bdb.getMaxSize();
}
/**
* Initiates the chunkstore. It will create a S3 chunk store per HashStore
* if AWS is enabled. Otherwise it will use the default ChunkStore @see
* FileChunkStore.
*
* @throws IOException
*/
/*
private void initChunkStore() throws IOException {
if (Main.AWSChunkStore)
chunkStore = new S3ChunkStore(this.getName());
else
chunkStore = fileStore;
}*/
/**
* returns the name of the TCHashStore
*
* @return the name of the hash store
*/
public String getName() {
return name;
}
/**
*method used to determine if the hash already exists in the database
*
* @param hash
* the md5 or sha hash to lookup
* @return returns true if the hash already exists.
* @throws IOException
*/
public boolean hashExists(byte[] hash) throws IOException {
return this.bdb.containsKey(hash);
}
/**
* The method used to open and connect to the TC database.
*
* @throws IOException
* @throws HashtableFullException
*/
private void connectDB() throws IOException, HashtableFullException {
File directory = new File(Main.hashDBStore + File.separator);
if (!directory.exists())
directory.mkdirs();
File dbf = new File(directory.getPath() + File.separator + "hashstore-"
+ this.getName());
long entries = ((Main.chunkStoreAllocationSize / (long) Main.chunkStorePageSize)) + 8000;
bdb = new CSByteArrayLongMap(entries, (short) Main.hashLength, dbf
.getPath());
}
/**
* A method to return a chunk from the hash store.
*
* @param hash
* the md5 or sha hash to store
* @return a hashchunk or null if the hash is not in the database.
*/
public HashChunk getHashChunk(byte[] hash) throws IOException {
HashChunk hs = null;
try {
byte[] data = bdb.getData(hash);
if(data == null && Arrays.equals(hash,blankHash)) {
log.info("found blank data request");
hs = new HashChunk(hash, 0, blankData.length, blankData, false);
}
hs = new HashChunk(hash, 0, data.length, data, false);
} catch (Exception e) {
log.log(Level.SEVERE, "unable to get hash "
+ StringUtils.getHexString(hash), e);
} finally {
// hashlock.unlock();
}
return hs;
}
public void processHashClaims() throws IOException {
this.bdb.claimRecords();
}
public void evictChunks(long time) throws IOException {
this.bdb.removeRecords(time);
}
/**
* Adds a block of data to the TC hash store and the chunk store.
*
* @param chunk
* the chunk to persist
* @return true returns true if the data was written. Data will not be
* written if the hash already exists in the db.
* @throws IOException
* @throws HashtableFullException
*/
public boolean addHashChunk(HashChunk chunk) throws IOException, HashtableFullException {
boolean written = false;
if(!bdb.containsKey(chunk.getName())){
try {
//long start = chunkStore.reserveWritePosition(chunk.getLen());
ChunkData cm = new ChunkData(chunk.getName(),
Main.chunkStorePageSize, chunk.getData());
if (bdb.put(cm)) {
written = true;
} else {
}
} catch (IOException e) {
log.log(Level.SEVERE, "Unable to commit chunk "
+ StringUtils.getHexString(chunk.getName()), e);
throw e;
} catch (HashtableFullException e) {
log.log(Level.SEVERE, "Unable to commit chunk "
+ StringUtils.getHexString(chunk.getName()), e);
throw e;
}
finally {
if (hashlock.isLocked())
hashlock.unlock();
}
}
return written;
}
/**
* Closes the hash store. The hash store should always be closed.
*
*
*/
public void close() {
this.closed = true;
try {
bdb.close();
bdb = null;
} catch (Exception e) {
}
}
public boolean isClosed() {
return this.closed;
}
}