/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package org.erasmusmc.ontology.ontologyutilities;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.erasmusmc.ids.DatabaseID;
import org.erasmusmc.ontology.Concept;
import org.erasmusmc.ontology.DefaultTypes;
import org.erasmusmc.ontology.Ontology;
import org.erasmusmc.ontology.Relation;
import org.erasmusmc.ontology.TermStore;
import org.erasmusmc.ontology.ontologyutilities.CurationFileParser.DatabaseIDmap;
//import org.erasmusmc.utilities.LVGNormaliser;
import org.erasmusmc.textMining.LVG.LVGNormaliser;
public class OntologyCurator {
private CurationFileParser curationFileParser;
private NormaliseUsingLVG lvg;
public OntologyCurator(String filename) {
curationFileParser = new CurationFileParser(filename);
lvg = new NormaliseUsingLVG();
}
public OntologyCurator() {
}
public void curateAndPrepare(Ontology ontology) {
if (curationFileParser != null){
removeConcepts(ontology);
mapConcepts(ontology);
removeSuppressedTermsByDatabaseID(ontology);
addTermsByDatabaseID(ontology);
}
for (Concept concept : ontology){
boolean ofGeneVoc = OntologyUtilities.hasGeneVoc(concept, ontology);
boolean ofChemVoc = OntologyUtilities.hasChemVoc(concept, ontology);
Set<String> previousTerms = new HashSet<String>();
Iterator<TermStore> termIterator = concept.getTerms().iterator();
while (termIterator.hasNext()) {
TermStore term = termIterator.next();
//Set matching flags:
if (ofGeneVoc || ofChemVoc)
OntologyUtilities.setGeneChemMatchingFlags(term);
else
OntologyUtilities.setDefaultMatchingFlags(term);
//Remove duplicate terms:
if (!previousTerms.add(term.text))
termIterator.remove();
}
if (curationFileParser != null){ //Remove suppressed terms:
removeSuppressedTermsPerVocabulary(ontology, concept);
removeSuppressedTermsAllVocs(concept);
}
}
}
private void addTermsByDatabaseID(Ontology ontology) {
for (DatabaseID databaseID: curationFileParser.addedTermsPerDatabaseID.keySet()) {
Set<Integer> ids = ontology.getConceptIDs(databaseID);
Set<String> addedTerms = curationFileParser.addedTermsPerDatabaseID.get(databaseID);
for (Integer id: ids) {
Concept concept = ontology.getConcept(id);
for (String term : addedTerms)
concept.getTerms().add(new TermStore(term));
}
}
}
private void removeSuppressedTermsPerVocabulary(Ontology ontology, Concept concept) {
List<Relation> vocs = ontology.getRelationsForConceptAsSubject(concept.getID(), DefaultTypes.fromVocabulary);
Set<String> suppressedTerms = new HashSet<String>();
for (Relation voc: vocs) {
Set<String> temp = curationFileParser.suppressedTermsPerVoc.get(ontology.getConcept(voc.object).getName());
if (temp != null)
suppressedTerms.addAll(temp);
}
removeTerms(concept, suppressedTerms);
}
private void removeSuppressedTermsByDatabaseID(Ontology ontology) {
for (DatabaseID databaseID: curationFileParser.suppressedTermsPerDatabaseID.keySet()) {
Set<Integer> ids = ontology.getConceptIDs(databaseID);
Set<String> suppressedTerms = curationFileParser.suppressedTermsPerDatabaseID.get(databaseID);
for (Integer id: ids) {
Concept concept = ontology.getConcept(id);
removeTerms(concept, suppressedTerms);
}
}
}
private void removeSuppressedTermsAllVocs(Concept concept) {
Set<String> suppressedTerms = curationFileParser.suppressedTermsAllVocs;
removeTerms(concept, suppressedTerms);
}
private void removeTerms(Concept concept, Set<String> suppressedTerms){
Iterator<TermStore> termIterator = concept.getTerms().iterator();
while (termIterator.hasNext()) {
TermStore term = termIterator.next();
String normTerm = CurationFileParser.normalizeTerm(term.text);
String lvgNormTerm = "";
if (!OntologyUtilities.isGeneSymbol(term.text))
lvgNormTerm = lvg.lvgnormalise(term.text);
if (suppressedTerms.contains(normTerm) || suppressedTerms.contains(lvgNormTerm)){
termIterator.remove();
System.out.println(concept.getID()+ "\t"+ concept.getName()+ "\t"+ term.text+ "\t"+ normTerm);
}
}
}
public void mapConcepts(Ontology ontology) {
for (DatabaseIDmap databaseIDmap: curationFileParser.mappingsFromToDBID) {
Set<Integer> fromIDs = ontology.getConceptIDs(databaseIDmap.from);
Set<Integer> toIDs = ontology.getConceptIDs(databaseIDmap.to);
for (Integer fromID: fromIDs) {
for (Integer toID: toIDs) {
if (fromID != toID) {
OntologyUtilities.mergeConcepts(ontology, fromID, toID);
}
}
}
}
}
public void removeConcepts(Ontology ontology){
for (DatabaseID databaseID: curationFileParser.suppressedWholeUMLSConcepts) {
Set<Integer> ids = ontology.getConceptIDs(databaseID);
for (Integer id: ids) {
ontology.removeConcept(id);
}
}
}
private class NormaliseUsingLVG extends LVGNormaliser{
public String lvgnormalise(String string){
return externalnormalise(string);
}
}
}