/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package org.erasmusmc.dataimport.tox;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.erasmusmc.ontology.Concept;
import org.erasmusmc.ontology.DefaultTypes;
import org.erasmusmc.ontology.OntologyManager;
import org.erasmusmc.ontology.OntologyStore;
import org.erasmusmc.ontology.Relation;
import org.erasmusmc.ontology.TermStore;
import org.erasmusmc.ontology.ontologyutilities.OntologyCurator;
import org.erasmusmc.utilities.TextFileUtilities;
public class MakeToxOntology {
public static int vocID = -1500;
public static String vocName = "TOX";
public static String thesarusTermsAsText = "/home/khettne/Projects/ToxThesaurus/toxGlossary_withDefinitions.txt";
public static String termsToRemoveFileName = "termsToRemoveToxGlossary.txt";
public static String ontologyName = "toxGlossary_290509";
public static void main(String[] args) {
MakeToxOntology make = new MakeToxOntology();
make.run();
}
public void run(){
Set<String> termsToRemove = getUndesiredTermsToFilterOut(termsToRemoveFileName);
OntologyStore ontology = new OntologyStore();
ontology.setName(ontologyName);
Concept generalVocabulary = new Concept(vocID);
generalVocabulary.setName(vocName);
ontology.setConcept(generalVocabulary);
int cui = 2900000;
List<String> terms = TextFileUtilities.loadFromFile(thesarusTermsAsText);
for (String termline: terms){
Concept concept = new Concept(cui++);
System.out.println(cui);
String[] parts = termline.split("\\|");
String definition = parts[1].trim();
concept.setDefinition(definition);
List<TermStore> termStorelist = new ArrayList<TermStore>();
String[] termlist = parts[0].split(";");
for (String term: termlist){
term = term.trim();
if (term.length()!=0 && !termsToRemove.contains(term)){
concept.setName(term);
termStorelist.add(new TermStore(term));
}
}
if (!termStorelist.isEmpty()){
concept.setTerms(termStorelist);
ontology.setConcept(concept);
Relation vocRelation = new Relation(concept.getID(), DefaultTypes.fromVocabulary, vocID);
ontology.setRelation(vocRelation);
}
}
OntologyCurator curator = new OntologyCurator();
curator.curateAndPrepare(ontology);
OntologyManager manager = new OntologyManager();
manager.deleteOntology(ontologyName);
manager.dumpStoreInDatabase(ontology);
}
public static Set<String> getUndesiredTermsToFilterOut(String filename){
Set<String> result = new HashSet<String>();
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(MakeToxOntology.class.getResourceAsStream(filename)));
try {
while (bufferedReader.ready()) {
result.add(bufferedReader.readLine().trim().toLowerCase());
}
} catch (IOException e) {
e.printStackTrace();
}
return result;
}
}