/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package org.erasmusmc.ontology.ontologyConstructors;
import java.util.ArrayList;
import java.util.List;
import org.erasmusmc.ontology.Concept;
import org.erasmusmc.ontology.Ontology;
import org.erasmusmc.ontology.OntologyPSFLoader;
import org.erasmusmc.ontology.OntologyStore;
import org.erasmusmc.ontology.TermStore;
import org.erasmusmc.ontology.ontologyutilities.OntologyCurator;
import org.erasmusmc.peregrine.ConceptPeregrine;
import org.erasmusmc.utilities.StringUtilities;
public class OCUMLS2006Homologene {
public static void main(String[] args){
Ontology ontology = constructOntology();
ConceptPeregrine peregrine = new ConceptPeregrine();
System.out.println("Loading normaliser cache. "+StringUtilities.now());
String normaliserCacheFile = "/home/public/Peregrine/standardNormCache2006.bin";
peregrine.normaliser.loadCacheBinary(normaliserCacheFile);
peregrine.setOntology(ontology);
System.gc();
System.out.println("Releasing. "+StringUtilities.now());
long start = System.currentTimeMillis();
long memStart = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
peregrine.release();
long timeTaken = System.currentTimeMillis() - start;
System.gc();
long memGrowthSize = memStart - (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory());
System.out.println(timeTaken + " ms");
System.out.println(memGrowthSize + " b");
peregrine.index("test");
System.out.println(peregrine.resultConcepts.size());
}
public static OntologyStore constructOntology(){
String psfFile = "/home/public/thesauri/UMLS2006Homologene_v1_6c.psf";
System.out.println("Loading thesaurus");
OntologyPSFLoader loader = new OntologyPSFLoader();
loader.loadDefinitions = true;
loader.loadFromPSF(psfFile);
System.out.println("Preparing thesaurus");
/*
for (Concept concept : loader.ontology)
if (concept.getID() > 2000000){
List<TermStore> newTerms = new ArrayList<TermStore>();
for (TermStore term : concept.getTerms()){
String[] tokens = term.text.split(" ");
if (tokens.length > 1){
String lastToken = tokens[tokens.length-1];
if (StringUtilities.isNumber(lastToken) || StringUtilities.isRomanNumeral(lastToken)){
TermStore newTerm = new TermStore(term.text.substring(0,term.text.length() - lastToken.length()-1) + "-" + lastToken);
newTerms.add(newTerm);
}
}
tokens = term.text.split("-");
if (tokens.length > 1){
String lastToken = tokens[tokens.length-1];
if (StringUtilities.isNumber(lastToken) || StringUtilities.isRomanNumeral(lastToken)){
TermStore newTerm = new TermStore(term.text.substring(0,term.text.length() - lastToken.length()-1) + " " + lastToken);
newTerms.add(newTerm);
}
}
}
concept.getTerms().addAll(newTerms);
}
*/
OntologyCurator curator = new OntologyCurator();
curator.curateAndPrepare(loader.ontology);
//for (Concept concept : loader.ontology)
// for (TermStore term : concept.getTerms()){
// term.caseSensitive = false;
// term.orderSensitive = true;
// term.normalised = false;
// }
loader.ontology.setName("UMLS2006Homologene");
return loader.ontology;
}
}