/* * Concept profile generation tool suite * Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center, * Rotterdam, The Netherlands * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package org.erasmusmc.groundhog; import java.io.Serializable; import java.util.Collection; import java.util.Comparator; import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.erasmusmc.math.vector.VectorCursor; import org.erasmusmc.ontology.ConceptVectorRecord; import org.erasmusmc.storecaching.StoreMapCaching; import org.erasmusmc.utilities.StringUtilities; import com.sleepycat.bind.EntryBinding; import com.sleepycat.bind.tuple.TupleBinding; import com.sleepycat.je.Cursor; import com.sleepycat.je.Database; import com.sleepycat.je.DatabaseConfig; import com.sleepycat.je.DatabaseEntry; import com.sleepycat.je.DatabaseException; import com.sleepycat.je.DatabaseStats; import com.sleepycat.je.Environment; import com.sleepycat.je.LockMode; import com.sleepycat.je.OperationStatus; public class ConceptToRecordIndex extends StoreMapCaching<Integer, ConceptToConceptVectorRecordIndexEntry> implements Serializable{ private static final long serialVersionUID = 6755259561426980631L; protected Environment environment; protected Database conceptToConceptVectorIndexStore; protected DatabaseConfig databaseConfig; protected EntryBinding myIntegerBinding; protected EntryBinding myDataBinding; protected String dbName = "CUI_2_FPUI_INDEX"; protected Set<Integer> indexedConcepts; protected Set<Integer> indexedConceptsWithDuplicates; protected Boolean useBulkImportMode = false; protected Comparator<Integer> comparator; public ConceptToRecordIndex(Environment environment,Comparator<Integer> comparator) throws DatabaseException { this.comparator=comparator; databaseConfig = new DatabaseConfig(); databaseConfig.setAllowCreate(true); databaseConfig.setTransactional(false); //databaseConfig.setSortedDuplicates(true); this.environment = environment; openDB(); myIntegerBinding = TupleBinding.getPrimitiveBinding(Integer.class); myDataBinding = new ConceptToRecordIndexEntryBinding(comparator); } public void openDB() { try { this.conceptToConceptVectorIndexStore = environment.openDatabase(null, this.dbName, this.databaseConfig); // environment.removeDatabase(null,this.dbName); } catch (DatabaseException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public GroundhogStatistics getGroundhogStatistics() { GroundhogStatistics wholeGroundhogStatistics = new GroundhogStatistics(); try { Cursor myCursor = conceptToConceptVectorIndexStore.openCursor(null, null); DatabaseEntry foundKey = new DatabaseEntry(); DatabaseEntry foundData = new DatabaseEntry(); while (myCursor.getNext(foundKey, foundData, LockMode.DEFAULT) == OperationStatus.SUCCESS) { ConceptToConceptVectorRecordIndexEntry entry = (ConceptToConceptVectorRecordIndexEntry) myDataBinding.entryToObject(foundData); Integer key = (Integer) myIntegerBinding.entryToObject(foundKey); ConceptStatistic conceptStatistic = new ConceptStatistic(); conceptStatistic.docFrequency = entry.conceptVectorRecordIDs.size(); conceptStatistic.termFrequency = entry.sumOfValuesInRecords.intValue(); wholeGroundhogStatistics.conceptStatistics.put(key, conceptStatistic); wholeGroundhogStatistics.allConceptOccurrences += entry.sumOfValuesInRecords; } myCursor.close(); } catch (DatabaseException e) { e.printStackTrace(); } return wholeGroundhogStatistics; } public void clearIndex() { // be careful! you have to rebuild your index EXPLICITELY if your // recordstore hasn't been cleared as well. try { conceptToConceptVectorIndexStore.close(); //Transaction transaction = environment.beginTransaction(null, null); environment.truncateDatabase(null, dbName, false); //transaction.commit(); openDB(); index.clear(); } catch (DatabaseException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public void addProcessedRecordsMapToStore(Map<Integer, ConceptToConceptVectorRecordIndexEntry> processedRecordsMap) { // System.out.println("storing processed records map"); //assumes ascending ids for (Map.Entry<Integer, ConceptToConceptVectorRecordIndexEntry> entry : processedRecordsMap.entrySet()){ setEntryInStore(entry.getKey(), entry.getValue()); /*if (indexedConcepts.contains(entry.getKey())) { indexedConceptsWithDuplicates.add(entry.getKey()); } else { indexedConcepts.add(entry.getKey()); } */ } } public void mergeDuplicateEntries() { try { System.out.println("Starting to merge duplicate entries in the Database " + StringUtilities.now()); Cursor myCursor = conceptToConceptVectorIndexStore.openCursor(null, null); DatabaseEntry searchKey = new DatabaseEntry(); DatabaseEntry foundData = new DatabaseEntry(); for (Integer key: indexedConceptsWithDuplicates) { myIntegerBinding.objectToEntry(key, searchKey); if (myCursor.getSearchKey(searchKey, foundData, null) == OperationStatus.SUCCESS) { if (myCursor.count() > 1) { ConceptToConceptVectorRecordIndexEntry entry = (ConceptToConceptVectorRecordIndexEntry) myDataBinding.entryToObject(foundData); //OperationStatus status = myCursor.delete(); while (myCursor.getNextDup(searchKey, foundData, LockMode.DEFAULT) == OperationStatus.SUCCESS) { ConceptToConceptVectorRecordIndexEntry addition = (ConceptToConceptVectorRecordIndexEntry) myDataBinding.entryToObject(foundData); entry.conceptVectorRecordIDs.addAll(addition.conceptVectorRecordIDs); entry.sumOfValuesInRecords += addition.sumOfValuesInRecords; myCursor.delete(); } setEntryInStore(key, entry); } } } System.out.println("Done merging duplicate entries in the Database " + StringUtilities.now()); myCursor.close(); } catch (DatabaseException e) { e.printStackTrace(); } } public void addFingerprintRecordToIndex(ConceptVectorRecord record) { VectorCursor<Integer> cursor = record.getConceptVector().getNonzeroCursor(); if (!useBulkImportMode) { while (cursor.isValid()) { ConceptToConceptVectorRecordIndexEntry entry = get(cursor.dimension()); if (entry == null) { entry = new ConceptToConceptVectorRecordIndexEntry(record.getID(), cursor.get()); } else { if (!entry.conceptVectorRecordIDs.contains(record.getID())) { entry.addRecordData(record.getID(), cursor.get()); } } set(cursor.dimension(), entry); cursor.next(); } } else { System.out.println("addRecordToIndex should never be called in bulkimportmode!"); } } public boolean removeConceptVectorRecordFromIndex(ConceptVectorRecord record) { VectorCursor<Integer> cursor = record.getConceptVector().getNonzeroCursor(); Boolean result = false; if (!useBulkImportMode) { while (cursor.isValid()) { ConceptToConceptVectorRecordIndexEntry entry = get(cursor.dimension()); if (entry != null) { if (entry.conceptVectorRecordIDs.contains(record.getID())) { entry.removeRecordData(record.getID(), cursor.get()); set(cursor.dimension(), entry); } } cursor.next(); } result = true; } return result; } public Iterator<ConceptToConceptVectorRecordIndexEntry> getIterator() { return new ConceptToConceptVectorIndexIterator(); } @Override public ConceptToConceptVectorRecordIndexEntry get(Integer key) { if (!useBulkImportMode) { ConceptToConceptVectorRecordIndexEntry result = getFromCache(key); if (result == null) return fetch(key); else return result; } else { return null; } } @Override public int size() { // this is a slow operation... Store in database? if (useBulkImportMode) { return -1; } else { int size = 0; try { DatabaseStats stats = conceptToConceptVectorIndexStore.getStats(null); Pattern p = Pattern.compile("numLeafNodes=([0-9]+)"); Matcher m = p.matcher(stats.toString()); if (m.find()) { size = Integer.parseInt(m.group(1)); } } catch (DatabaseException e) { // TODO Auto-generated catch block e.printStackTrace(); } return size; } } @Override protected ConceptToConceptVectorRecordIndexEntry getEntryFromStoreWithID(Integer id) { ConceptToConceptVectorRecordIndexEntry conceptToRecordIndexEntry = null; try { DatabaseEntry databaseKey = new DatabaseEntry(); DatabaseEntry databaseValue = new DatabaseEntry(); myIntegerBinding.objectToEntry(id, databaseKey); conceptToConceptVectorIndexStore.get(null, databaseKey, databaseValue, LockMode.DEFAULT); if (databaseValue.getSize() != 0) { conceptToRecordIndexEntry = (ConceptToConceptVectorRecordIndexEntry) myDataBinding.entryToObject(databaseValue); } } catch (DatabaseException e) { e.printStackTrace(); } return conceptToRecordIndexEntry; } @Override protected Map<Integer, ConceptToConceptVectorRecordIndexEntry> getEntriesFromStoreWithIDs(Collection<Integer> ids) { // TODO Auto-generated method stub return null; } @Override protected void setEntryInStore(Integer id, ConceptToConceptVectorRecordIndexEntry value) { try { // Transaction transaction = environment.beginTransaction(null, null); DatabaseEntry databaseKey = new DatabaseEntry(); myIntegerBinding.objectToEntry(id, databaseKey); DatabaseEntry databaseValue = new DatabaseEntry(); myDataBinding.objectToEntry(value, databaseValue); conceptToConceptVectorIndexStore.put(null, databaseKey, databaseValue); //transaction.commit(); } catch (DatabaseException e) { e.printStackTrace(); } } protected class ConceptToConceptVectorIndexIterator implements Iterator<ConceptToConceptVectorRecordIndexEntry> { Cursor myCursor; DatabaseEntry foundKey = new DatabaseEntry(); DatabaseEntry foundData = new DatabaseEntry(); ConceptToConceptVectorRecordIndexEntry next = null; public ConceptToConceptVectorIndexIterator() { try { myCursor = conceptToConceptVectorIndexStore.openCursor(null, null); } catch (DatabaseException e) { e.printStackTrace(); } readNext(); } private void readNext() { if (myCursor != null) { try { if (myCursor.getNext(foundKey, foundData, LockMode.DEFAULT) == OperationStatus.SUCCESS) { Integer key = (Integer) myIntegerBinding.entryToObject(foundKey); next = (ConceptToConceptVectorRecordIndexEntry) myDataBinding.entryToObject(foundData); next.key = key; } else { next = null; myCursor.close(); } } catch (DatabaseException e) { e.printStackTrace(); } } } public boolean hasNext() { return next != null; } public ConceptToConceptVectorRecordIndexEntry next() { ConceptToConceptVectorRecordIndexEntry result = next; readNext(); return result; } public void remove() { // not implemented System.out.println("Remove is not implemented for ConceptToConceptVectorIndexIterator iterator!"); } } }