/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package org.erasmusmc.databases.cooccurrenceDataBase;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.erasmusmc.collections.ComparatorFactory;
import org.erasmusmc.collections.IntList;
import org.erasmusmc.collections.SortedIntList2FloatMap;
import org.erasmusmc.collections.SortedIntList2IntMap;
import org.erasmusmc.collections.SortedListMap;
import org.erasmusmc.collections.SortedPair;
import org.erasmusmc.databases.BatchNumberAndIntegerIDBinding;
import org.erasmusmc.databases.BatchwiseIntegerID;
import org.erasmusmc.groundhog.ConceptToConceptVectorRecordIndexEntry;
import org.erasmusmc.groundhog.Groundhog;
import org.erasmusmc.ontology.ConceptVector;
import org.erasmusmc.ontology.ConceptVectorRecord;
import org.erasmusmc.storecaching.StoreMapCaching;
import org.erasmusmc.utilities.StringUtilities;
import com.sleepycat.bind.EntryBinding;
import com.sleepycat.bind.tuple.TupleBinding;
import com.sleepycat.je.Database;
import com.sleepycat.je.DatabaseConfig;
import com.sleepycat.je.DatabaseEntry;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.DatabaseStats;
import com.sleepycat.je.Environment;
import com.sleepycat.je.EnvironmentConfig;
import com.sleepycat.je.LockMode;
public class CooccurrenceDatabase extends StoreMapCaching<Integer, SortedIntList2IntMap> {
/** Changing this name will render older databases unreadable */
protected String databaseName = "CooccurrenceDatabase";
/** higher values lead to quicker reindexation but more memory usage */
protected int reindexBatchSize = 500000;
protected Environment environment;
protected DatabaseConfig databaseConfig;
protected Database cooccurrenceDB;
protected EntryBinding myIntegerBinding;
protected EntryBinding myDataBinding;
protected EnvironmentConfig environmentConfig;
protected TupleBinding tempkeyBinding = null;
protected CooccurrenceDatabaseShutdown sh;
public CooccurrenceDatabase(String foldername){
File datadir = new File(foldername);
if (!datadir.exists())
datadir.mkdir();
init(datadir);
}
public CooccurrenceDatabase(File datadir) {
init(datadir);
}
private void init(File datadir){
try {
environmentConfig = new EnvironmentConfig();
environmentConfig.setAllowCreate(true);
environmentConfig.setTransactional(true);
environmentConfig.setCacheSize(30240000);
environment = new Environment(datadir, environmentConfig);
databaseConfig = new DatabaseConfig();
databaseConfig.setAllowCreate(true);
databaseConfig.setTransactional(true);
openDB();
myIntegerBinding = TupleBinding.getPrimitiveBinding(Integer.class);
myDataBinding = new Integer2Integer2IntegerMapBinding();
sh = new CooccurrenceDatabaseShutdown();
sh.c = this;
Runtime.getRuntime().addShutdownHook(sh);
} catch (DatabaseException e) {
e.printStackTrace();
}
}
private void parseConceptVector(ConceptVector conceptVector, SortedListMap<Integer, SortedIntList2IntMap> coocMap) {
SortedIntList2FloatMap cvrmap = conceptVector.values;
for (int index = 0; index < cvrmap.size(); index++) {
int id1 = cvrmap.getKey(index);
for (int index2 = index; index2 < cvrmap.size(); index2++) {
int id2 = cvrmap.getKey(index2);
int small = id1;
int big = id2;
if (id1 > id2) {
small = id2;
big = id1;
}
SortedIntList2IntMap map = coocMap.get(small);
if (map == null) {
map = new SortedIntList2IntMap();
coocMap.put(small, map);
}
Integer base = map.get(big);
if (base == null)
base = 0;
base++;
map.put(big, base);
}
}
}
public void makeFromGroundhog(Groundhog groundhog) {
int size = groundhog.size();
if (size > reindexBatchSize) {
batchwiseIndexing(size, groundhog);
}
else {
SortedListMap<Integer, SortedIntList2IntMap> coocMap = new SortedListMap<Integer, SortedIntList2IntMap>(ComparatorFactory.getAscendingIntegerComparator());
System.out.println("Start iteration: " + StringUtilities.now());
Iterator<ConceptVectorRecord> iterator = groundhog.getIterator();
int counter = 0;
while (iterator.hasNext()) {
ConceptVectorRecord cvr = iterator.next();
if ((counter % 100000) == 0) {
System.out.println(100 * (double) counter / size + "%\t" + StringUtilities.now());
}
parseConceptVector(cvr.getConceptVector(), coocMap);
counter++;
}
Iterator<SortedListMap<Integer, SortedIntList2IntMap>.MapEntry<Integer,SortedIntList2IntMap>> entryIt = coocMap.entryIterator();
while (entryIt.hasNext()) {
SortedListMap<Integer, SortedIntList2IntMap>.MapEntry<Integer,SortedIntList2IntMap> entry = entryIt.next();
setEntryInStore(entry.getKey(), entry.getValue());
}
System.out.println("done" + StringUtilities.now());
}
}
private void batchwiseIndexing(Integer size, Groundhog groundhog) {
try {
Database temp = environment.openDatabase(null, "temp", databaseConfig);
tempkeyBinding = new BatchNumberAndIntegerIDBinding();
Map<Integer, IntList> batchHistory = new HashMap<Integer, IntList>();
SortedListMap<Integer, SortedIntList2IntMap> coocMap = new SortedListMap<Integer, SortedIntList2IntMap>(ComparatorFactory.getAscendingIntegerComparator());
Integer batchNumber = 0;
Integer counter = 0;
Iterator<ConceptVectorRecord> iterator = groundhog.getIterator();
while (iterator.hasNext()) {
ConceptVectorRecord cvr = iterator.next();
parseConceptVector(cvr.getConceptVector(), coocMap);
counter++;
if (counter % reindexBatchSize == 0) {
processToTempStore(coocMap, temp, batchNumber, batchHistory);
coocMap = new SortedListMap<Integer, SortedIntList2IntMap>(ComparatorFactory.getAscendingIntegerComparator());
batchNumber++;
double fraction = 100d * (double) (counter) / (double) size;
System.out.println(fraction + "%");
}
}
processToTempStore(coocMap, temp, batchNumber, batchHistory);
double fraction = 100d * (double) (counter) / (double) size;
System.out.println(fraction + "%");
mergeBatchIndex(batchHistory, temp);
temp.close();
environment.removeDatabase(null, "temp");
} catch (DatabaseException e) {
e.printStackTrace();
}
}
private void processToTempStore(SortedListMap<Integer, SortedIntList2IntMap> coocMap, Database temp, Integer batch, Map<Integer, IntList> batchHistory) {
try {
Iterator<SortedListMap<Integer, SortedIntList2IntMap>.MapEntry<Integer,SortedIntList2IntMap>> entryIt = coocMap.entryIterator();
while (entryIt.hasNext()) {
SortedListMap<Integer, SortedIntList2IntMap>.MapEntry<Integer,SortedIntList2IntMap> entry = entryIt.next();
Integer key = entry.getKey();
BatchwiseIntegerID batchwiseConceptID = new BatchwiseIntegerID(key, batch);
DatabaseEntry databaseKey = new DatabaseEntry();
tempkeyBinding.objectToEntry(batchwiseConceptID, databaseKey);
DatabaseEntry databaseValue = new DatabaseEntry();
myDataBinding.objectToEntry(entry.getValue(), databaseValue);
temp.put(null, databaseKey, databaseValue);
IntList batchArray = batchHistory.get(key);
if (batchArray == null) {
batchArray = new IntList();
batchHistory.put(key, batchArray);
}
batchArray.add(batch);
}
} catch (DatabaseException e) {
e.printStackTrace();
}
}
private void mergeBatchIndex(Map<Integer, IntList> batchHistory, Database temp) {
try {
System.out.println("Merging batch index: ");
int bs = batchHistory.size();
double i = 0;
for (Integer cui: batchHistory.keySet()) {
i++;
List<Integer> batches = batchHistory.get(cui);
SortedIntList2IntMap entry = new SortedIntList2IntMap();
for (Integer batch: batches) {
BatchwiseIntegerID batchwiseConceptID = new BatchwiseIntegerID(cui, batch);
DatabaseEntry databaseKey = new DatabaseEntry();
tempkeyBinding.objectToEntry(batchwiseConceptID, databaseKey);
DatabaseEntry databaseValue = new DatabaseEntry();
temp.get(null, databaseKey, databaseValue, LockMode.DEFAULT);
SortedIntList2IntMap addition = (SortedIntList2IntMap) myDataBinding.entryToObject(databaseValue);
Iterator<SortedIntList2IntMap.MapEntry> it = addition.entryIterator();
while (it.hasNext()) {
SortedIntList2IntMap.MapEntry batchEntry = it.next();
Integer val = entry.get(batchEntry.getKey());
if (val == null)
val = batchEntry.getValue();
else
val += batchEntry.getValue();
entry.put(batchEntry.getKey(), val);
}
}
setEntryInStore(cui, entry);
if (i % 10000 == 0) {
System.out.println("Entry: " + cui + "\t" + 100d * i / bs + "%");
}
}
} catch (DatabaseException e) {
e.printStackTrace();
}
}
public void checkDB(Groundhog groundhog) {
Iterator<ConceptToConceptVectorRecordIndexEntry> iterator = groundhog.getConceptToRecordIndexIterator();
System.out.println("Checking coocdb " + StringUtilities.now());
while (iterator.hasNext()) {
ConceptToConceptVectorRecordIndexEntry entry = iterator.next();
Integer keyfreq = entry.conceptVectorRecordIDs.size();
Integer keyfreq2 = getCooccurrenceCount(entry.key, entry.key);
if (keyfreq != keyfreq2) {
System.out.println("Error for entry " + entry.key + "; in groundhog: " + keyfreq + ", in coocdb: " + keyfreq2);
}
}
System.out.println("Done checking coocdb");
}
public void openDB() {
try {
this.cooccurrenceDB = environment.openDatabase(null, this.databaseName, this.databaseConfig);
// environment.removeDatabase(null,this.dbName);
} catch (DatabaseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public int getCooccurrenceCount(Integer cui1, Integer cui2) {
if(cui1 == null || cui2 == null) return 0;
Integer smaller = cui1;
Integer bigger = cui2;
if (smaller > bigger) {
bigger = smaller;
smaller = cui2;
}
SortedIntList2IntMap map = get(smaller);
if (map != null) {
Integer id = map.get(bigger);
if (id != null) {
return id;
}
}
return 0;
}
public int getCooccurrenceCount(SortedPair<Integer> integerPair) {
return getCooccurrenceCount(integerPair.getObject1(), integerPair.getObject2());
}
public List<Integer> getBatch(List<SortedPair<Integer>> ids) {
List<Integer> result = new ArrayList<Integer>();
for (SortedPair<Integer> integerPair: ids) {
result.add(getCooccurrenceCount(integerPair));
}
return result;
}
@Override
protected Map<Integer, SortedIntList2IntMap> getEntriesFromStoreWithIDs(Collection<Integer> ids) {
// TODO Auto-generated method stub
return null;
}
@Override
protected SortedIntList2IntMap getEntryFromStoreWithID(Integer id) {
SortedIntList2IntMap result = null;
try {
DatabaseEntry databaseKey = new DatabaseEntry();
DatabaseEntry databaseValue = new DatabaseEntry();
myIntegerBinding.objectToEntry(id, databaseKey);
cooccurrenceDB.get(null, databaseKey, databaseValue, LockMode.DEFAULT);
if (databaseValue.getSize() != 0) {
result = (SortedIntList2IntMap) myDataBinding.entryToObject(databaseValue);
}
} catch (DatabaseException e) {
e.printStackTrace();
}
return result;
}
@Override
protected void setEntryInStore(Integer id, SortedIntList2IntMap value) {
try {
DatabaseEntry databaseKey = new DatabaseEntry();
myIntegerBinding.objectToEntry(id, databaseKey);
DatabaseEntry databaseValue = new DatabaseEntry();
myDataBinding.objectToEntry(value, databaseValue);
cooccurrenceDB.put(null, databaseKey, databaseValue);
} catch (DatabaseException e) {
e.printStackTrace();
}
}
@Override
public int size() {
int size = 0;
try {
DatabaseStats stats = cooccurrenceDB.getStats(null);
Pattern p = Pattern.compile("numLeafNodes=([0-9]+)");
Matcher m = p.matcher(stats.toString());
if (m.find()) {
size = Integer.parseInt(m.group(1));
}
} catch (DatabaseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return size;
}
private void closeDatabase() {
try {
if (cooccurrenceDB != null) {
cooccurrenceDB.close();
}
if (environment != null) {
environment.cleanLog(); // Clean the log before closing
environment.close();
}
} catch (DatabaseException e) {
e.printStackTrace();
}
}
protected void finalize() {
closeDatabase();
Runtime.getRuntime().removeShutdownHook(sh);
}
protected class CooccurrenceDatabaseShutdown extends Thread {
public CooccurrenceDatabase c;
public void run() {
c.closeDatabase();
}
}
}