/*
############################################################################
##
## Copyright (C) 2006-2009 University of Utah. All rights reserved.
##
## This file is part of DeepPeep.
##
## This file may be used under the terms of the GNU General Public
## License version 2.0 as published by the Free Software Foundation
## and appearing in the file LICENSE.GPL included in the packaging of
## this file. Please review the following to ensure GNU General Public
## Licensing requirements will be met:
## http://www.opensource.org/licenses/gpl-license.php
##
## If you are unsure which license is appropriate for your use (for
## instance, you are interested in developing a commercial derivative
## of DeepPeep), please contact us at deeppeep@sci.utah.edu.
##
## This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
## WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
##
############################################################################
*/
package focusedCrawler.util.persistence;
import java.io.File;
import java.lang.reflect.Array;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import focusedCrawler.util.persistence.bdb.BerkeleyDBHashTable;
import focusedCrawler.util.persistence.rocksdb.RocksDBHashtable;
public class PersistentHashtable<T> {
enum DB {
BERKELEYDB, ROCKSDB
}
private static Logger logger = LoggerFactory.getLogger(PersistentHashtable.class);
private HashtableDb<T> persistentTable;
private int tempMaxSize = 1000;
private List<Tuple<T>> tempList = new ArrayList<>(tempMaxSize);
private Cache<String, T> cache;
public PersistentHashtable(String path, int cacheSize, Class<T> contentClass) {
this(path, cacheSize, contentClass, DB.ROCKSDB);
}
public PersistentHashtable(String path, int cacheSize, Class<T> contentClass, DB backend) {
File file = new File(path);
if(!file.exists()) {
file.mkdirs();
}
this.cache = CacheBuilder.newBuilder().maximumSize(cacheSize).build();
if(backend == DB.BERKELEYDB) {
try {
this.persistentTable = new BerkeleyDBHashTable<T>(file, contentClass);
} catch (Exception e) {
throw new RuntimeException("Failed to open BerkeleyDB database at "+path, e);
}
} else {
this.persistentTable = new RocksDBHashtable<>(file.getPath(), contentClass);
}
}
public List<Tuple<T>> getTable() {
try {
return persistentTable.listElements();
} catch (Exception e) {
throw new RuntimeException("Failed to get hashtable values.", e);
}
}
/**
* Use method {@link #getTable()} instead.
* @return
*/
@SuppressWarnings("unchecked")
@Deprecated
public Tuple<T>[] getTableAsArray() {
List<Tuple<T>> table = getTable();
return table.toArray((Tuple<T>[]) Array.newInstance(Tuple.class, table.size()));
}
public synchronized T get(String key){
try {
key = URLEncoder.encode(key, "UTF-8");
T obj = cache.getIfPresent(key);
if(obj == null){
obj = persistentTable.get(key);
}
return obj;
} catch (Exception e) {
logger.error("Failed to get key from hashtable.", e);
return null;
}
}
public synchronized boolean put(String key, T value) {
try {
key = URLEncoder.encode(key, "UTF-8");
cache.put(key, value);
tempList.add(new Tuple<T>(key, value));
if (tempList.size() == tempMaxSize) {
commit();
}
return true;
} catch (Exception e) {
logger.error("Failed to store item in persitent hashtable.", e);
return false;
}
}
public synchronized void commit() {
if(tempList.size() == 0)
return;
try {
persistentTable.put(tempList);
tempList = new ArrayList<>();
} catch (Exception e) {
throw new RuntimeException("Failed to commit persistent hashtable.", e);
}
}
public synchronized void close() {
this.commit();
persistentTable.close();
}
public synchronized List<Tuple<T>> orderedSet(final Comparator<T> valueComparator) {
try {
List<Tuple<T>> elements = persistentTable.listElements();
Collections.sort(elements, new Comparator<Tuple<T>>() {
@Override
public int compare(Tuple<T> o1, Tuple<T> o2) {
return valueComparator.compare(o1.getValue(), o2.getValue());
}
});
return elements;
} catch (Exception e) {
throw new RuntimeException("Failed to list elements from hashtable.", e);
}
}
public TupleIterator<T> iterator() {
try {
this.commit();
return persistentTable.iterator();
} catch (Exception e) {
throw new RuntimeException("Failed to open hashtable iterator.", e);
}
}
}