/* * Concept profile generation tool suite * Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center, * Rotterdam, The Netherlands * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package org.erasmusmc.dataimport.UMLS; import java.util.ArrayList; import java.util.List; import org.erasmusmc.ontology.Ontology; import org.erasmusmc.ontology.OntologyFileLoader; import org.erasmusmc.ontology.OntologyManager; import org.erasmusmc.ontology.OntologyStore; import org.erasmusmc.ontology.ontologyutilities.HomonymAnalyzer; import org.erasmusmc.ontology.ontologyutilities.OntologyCurator; import org.erasmusmc.ontology.ontologyutilities.OntologyUtilities; import org.erasmusmc.ontology.ontologyutilities.evaluationScripts.OntologyFrequencyCount; import org.erasmusmc.utilities.StringUtilities; import org.erasmusmc.utilities.TextFileUtilities; public class UMLSloader { public static boolean generateCurationInformation = true; public static String normaliserCacheFile = "/home/public/Peregrine/standardNormCache2006.bin"; public static String randomPMIDSampleFile = "/home/public/PMIDs/Random100.000.PMIDs"; public static String tempFolder = "/home/khettne/temp/"; public static String version = "2010AB"; public static String timestamp = "180211"; //This file will contain the rewritten terms public static String rewrittenMRCONSOfile = "/home/khettne/Projects/UMLS/"+version+"/MRCONSO"+version+"_"+timestamp+"_rewrittenAndSuppressed_test.RRF"; //Rewrite and suppress log public static String rewriteAndSuppressLog = "/home/khettne/Projects/UMLS/"+version+"/UMLS"+version+"_"+timestamp+"_rewriting_log_test.log"; // This file contains concept IDs, terms, and vocabularies public static String MRCONSOfile = "/home/public/thesauri/UMLS"+version+"/META/MRCONSO.RRF"; //This file contains semantic types for a concept public static String MRSTYfile = "/home/public/thesauri/UMLS"+version+"/META/MRSTY.RRF"; public static String SRDEFfile = "/home/public/thesauri/UMLS"+version+"/NET/SRDEF"; // This file contains abbreviations public static String LRABRfile = "/home/public/thesauri/UMLS"+version+"/LEX/LRABR"; //This file contains the concept definitions public static String MRDEFfile = "/home/public/thesauri/UMLS"+version+"/META/MRDEF.RRF"; //Create loading log public static List<String> log_output = new ArrayList<String>(); public static String logname = "/home/khettne/Projects/UMLS/"+version+"/UMLS"+version+"_loading_log_test.log"; //Curation file public static String curationFilePath = "/home/khettne/Projects/UMLS/"+version+"/UMLS_curation_file_updatedFor"+version+".txt"; //Name of the ontology public static String ontologyName = "UMLS"+version+"_"+timestamp; public static String ontologyPath = "/home/khettne/Projects/UMLS/"+version+"/UMLS"+version+"_"+timestamp+".ontology"; public static void main(String[] args) { System.out.println("Starting script: "+StringUtilities.now()); Ontology newOntology = new OntologyStore(); newOntology.setName(ontologyName); //Filter the UMLS System.out.println("Rewriting and suppressing MRCONSO... "+StringUtilities.now()); RewriteAndSuppressUMLSusingCasper rewriteAndSuppress = new RewriteAndSuppressUMLSusingCasper(); rewriteAndSuppress.run(MRCONSOfile, MRSTYfile, rewrittenMRCONSOfile, rewriteAndSuppressLog); //Fill the ontology System.out.println("Executing MRCONSOLoader... "+StringUtilities.now()); RewrittenMRCONSOLoader.loadFromRewrittenMRCONSO(newOntology, rewrittenMRCONSOfile, log_output, LRABRfile); System.out.println("Executing MRSTYLoader... "+StringUtilities.now()); MRSTYLoader.addSemanticType(newOntology, MRSTYfile, SRDEFfile); System.out.println("Executing MRDEFLoader... "+StringUtilities.now()); MRDEFLoader.addDefinition(newOntology, MRDEFfile, log_output); // Save to log System.out.println("Saving to log file "+StringUtilities.now()); TextFileUtilities.saveToFile(log_output, logname); System.out.println("Formatting UMLS database IDs: "+StringUtilities.now()); newOntology = AddUMLSidAsDatabaseID.addUMLSid(newOntology); System.out.println("Mapping ontology: "+StringUtilities.now()); newOntology = MapGoAndOmimToOntology.mapGoAndOMIMFromMRCONSOtoOntology(newOntology, MRCONSOfile); //Curate System.out.println(StringUtilities.now() + "\tApplying generic filter"); OntologyUtilities.filterOntology(newOntology, OntologyUtilities.stopwordsForFiltering); OntologyCurator curator = new OntologyCurator(curationFilePath); curator.curateAndPrepare(newOntology); if (generateCurationInformation) generateCurationInformation(newOntology); // Save ontology OntologyFileLoader loader = new OntologyFileLoader(); loader.save((OntologyStore)newOntology, ontologyPath); // Load ontology into database OntologyManager ontologyManager = new OntologyManager(); ontologyManager.dumpStoreInDatabase((OntologyStore)newOntology); } private static void generateCurationInformation(Ontology newOntology) { System.out.println(StringUtilities.now() + "\tAnalyzing homonyms"); HomonymAnalyzer homcount = new HomonymAnalyzer(); homcount.destroyOntologyDuringRelease = false; homcount.normaliser.loadCacheBinary(normaliserCacheFile); homcount.setOntology(newOntology); homcount.countHomonyms(tempFolder+"umls_homonyms.txt"); System.out.println(StringUtilities.now() + "\tCounting frequencies"); OntologyFrequencyCount.disambiguate = false; OntologyFrequencyCount.pmidsFile = randomPMIDSampleFile; OntologyFrequencyCount.outputFile = tempFolder+"umls_frequencyCounts.txt"; new OntologyFrequencyCount(newOntology); } }