/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package JochemBuilder.umlsChem;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.erasmusmc.ids.DatabaseID;
import org.erasmusmc.ontology.Concept;
import org.erasmusmc.ontology.DefaultTypes;
import org.erasmusmc.ontology.Ontology;
import org.erasmusmc.ontology.OntologyFileLoader;
import org.erasmusmc.ontology.OntologyStore;
import org.erasmusmc.ontology.Relation;
import org.erasmusmc.utilities.StringUtilities;
public class UMLSchemFilterForSemanticTypeAfterOntologyCreation {
static int aminoacidPeptideOrProtein =-116;
static int enzyme =-126;
static int receptor =-192;
//static int immunologicFactor = -129;
static int chemicalViewedFunctionally = -120;
//static int chemicalViewedStructually = -104;
static int biomedOrDentalMaterial = -122;
static int virus = -5;
static int plant = -2;
static int chemical = -103;
static int food = -168;
static int cell = -25;
static int geneOrGenome = -28;
static int spatialConcept = -82;
static int environmentalEffectOfHumans = -69;
static int bodySubstance = -31;
static int clinicalDrug = -200;
static int medicalDevice = -74;
static int cellComponent = -26;
static int nucleotideSequence = -86;
static int biomedicalOccupationOrdiscipline = -91;
static int manufacturedObject = -73;
static int bodyPartOrganOrOrganComponent = -23;
static int aminoAcidSequence = -87;
static int classification = -185;
static int drugDeliveryDevice = -203;
static int tissue = -24;
static int bacterium = -7;
static int fungus = -4;
static int molecularFunction = -44;
public static int generalChemicalVocID = -3000;
public static String generalChemicalVocName = "CHEMICAL";
public static int specificChemicalVocID = -3009;
public static String specificChemicalVocName = "UMLSCHEM";
public static int umlsSemID = -103;
public static String umlsSemName = "Chemical";
public void run(String nameOfOldOntologyFile, String nameOfNewOntologyFile) {
System.out.println("Starting script: "+StringUtilities.now());
Map<Integer, String> semanticTypes = new HashMap<Integer, String>();
Ontology filteredOntology = new OntologyStore();
//Set the standard vocabulary and umls semantic type concepts
Concept generalVocabulary = new Concept(generalChemicalVocID);
generalVocabulary.setName(generalChemicalVocName);
filteredOntology.setConcept(generalVocabulary);
Concept specificVocabulary = new Concept(specificChemicalVocID);
specificVocabulary.setName(specificChemicalVocName);
filteredOntology.setConcept(specificVocabulary);
Concept semantictype = new Concept(umlsSemID);
semantictype.setName(umlsSemName);
filteredOntology.setConcept(semantictype);
Set<Integer> includedCUIs = new HashSet<Integer>();
OntologyFileLoader filteredLoader = new OntologyFileLoader();
OntologyFileLoader loader = new OntologyFileLoader();
OntologyStore ontology = loader.load(nameOfOldOntologyFile);
Iterator<Concept> conceptIterator = ontology.getConceptIterator();
int lineCount = 0;
while (conceptIterator.hasNext()) {
lineCount++;
if (lineCount % 10000 == 0)
System.out.println(lineCount);
Concept concept = conceptIterator.next();
if (concept.getID() < 0 && concept.getID() > -1000){
semanticTypes.put(concept.getID(), concept.getName());
}
else if (concept.getID() > 0){
List<Relation> semRelations = ontology.getRelationsForConceptAsSubject(concept.getID(), DefaultTypes.isOfSemanticType);
if (!conceptHasUndesiredSemanticType(semRelations, semanticTypes, concept) ){
includedCUIs.add(concept.getID());
filteredOntology.setConcept(concept);
//Set vocabularies and standard semantic type
Relation generalVocRelation = new Relation(concept.getID(), DefaultTypes.fromVocabulary, generalChemicalVocID);
filteredOntology.setRelation(generalVocRelation);
Relation specificVocRelation = new Relation(concept.getID(), DefaultTypes.fromVocabulary, specificChemicalVocID);
filteredOntology.setRelation(specificVocRelation);
Relation semRelation = new Relation(concept.getID(), DefaultTypes.isOfSemanticType, umlsSemID);
filteredOntology.setRelation(semRelation);
}
}
}
// Copy databaseIDs:
List<DatabaseID> databaseIDs;
for (int cui: includedCUIs) {
databaseIDs = ontology.getDatabaseIDsForConcept(cui);
if (databaseIDs != null)
for (DatabaseID databaseID: databaseIDs)
filteredOntology.setDatabaseIDForConcept(cui, databaseID);
}
filteredLoader.save((OntologyStore)filteredOntology, nameOfNewOntologyFile);
}
public static boolean conceptHasUndesiredSemanticType(List<Relation> semRelations, Map<Integer, String> semanticTypes, Concept concept){
for (Relation semRelation: semRelations){
int key = semRelation.object;
if (undesiredSemanticType().contains(key)){
return true;
}
}
return false;
}
public static Set<Integer> undesiredSemanticType(){
Set<Integer> result = new TreeSet<Integer>();
result.add(aminoacidPeptideOrProtein);
result.add(enzyme);
result.add(receptor);
// result.add(immunologicFactor);
result.add(chemicalViewedFunctionally);
// result.add(chemicalViewedStructually);
result.add(biomedOrDentalMaterial);
result.add(virus);
result.add(plant);
result.add(chemical);
result.add(food);
result.add(cell);
result.add(geneOrGenome);
result.add(spatialConcept);
result.add(environmentalEffectOfHumans);
result.add(bodySubstance);
result.add(clinicalDrug);
result.add(medicalDevice);
result.add(cellComponent);
result.add(nucleotideSequence);
result.add(biomedicalOccupationOrdiscipline);
result.add(manufacturedObject);
result.add(bodyPartOrganOrOrganComponent);
result.add(aminoAcidSequence);
result.add(classification);
result.add(drugDeliveryDevice);
result.add(tissue);
result.add(bacterium);
result.add(fungus);
result.add(molecularFunction);
return result;
}
}