/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package org.erasmusmc.dataimport.UMLS;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.erasmusmc.ids.DatabaseID;
import org.erasmusmc.ontology.Concept;
import org.erasmusmc.ontology.DefaultTypes;
import org.erasmusmc.ontology.Ontology;
import org.erasmusmc.ontology.Relation;
import org.erasmusmc.ontology.TermStore;
import org.erasmusmc.utilities.ReadTextFile;
public class RewrittenMRCONSOLoader {
public static void loadFromRewrittenMRCONSO(Ontology ontology, String filename, List<String> log_output, String abbreviationsFile) {
int cuiCol = 0;
int termIDCol = 5;
int termCol = 14;
int vocCol = 11;
int GOCol = 13;
int cui = -1;
int prevCui = -1;
String sui = "";
String prevSui = "";
Map<String, Integer> vocabularies = new HashMap<String, Integer>();
Set<Integer> foundVocsForConcept = null;
Concept concept = null;
// Get list of abbreviations and acronyms
List<String> listOfAbbreviationsOrAcronyms = UMLSFiltersBeforeOntologyCreation.getAbbreviationsAndAcronyms(abbreviationsFile);
ReadTextFile textFile = new ReadTextFile(filename);
Iterator<String> fileIterator = textFile.getIterator();
int lineCount = 0;
while (fileIterator.hasNext()) {
lineCount++;
if (lineCount % 100000 == 0)
System.out.println(lineCount);
String line = fileIterator.next();
if (line.length() != 0) {
String[] columns = line.split("\\|");
// Save the cui and TermID as Integers
cui = Integer.parseInt(columns[cuiCol].trim().substring(1, columns[cuiCol].length()));
sui = columns[termIDCol].trim().substring(1, columns[termIDCol].length());
// If we encounter a new concept identifier in the file: create a new
// concept
if (prevCui != cui) {
if (concept != null) {
if (concept.getTerms().size() != 0)
concept.setName(concept.getTerms().get(0).text);
ontology.setConcept(concept);
}
concept = new Concept(cui);
foundVocsForConcept = new TreeSet<Integer>();
prevCui = cui;
}
// Add GO-identifier
String voc = columns[vocCol].trim();
if (voc.equals("GO")) {
String GOstring = columns[GOCol].trim();
DatabaseID databaseID = new DatabaseID("GO", GOstring);
ontology.setDatabaseIDForConcept(concept.getID(), databaseID);
}
// If we have not encountered the term before, add it to the concept
if (!prevSui.equals(sui)) {
String term = columns[termCol].trim();
// If the term in not an abbreviation or acronym, convert to lower
// case
// if it is from a vocabulary which has only upper case terms
String checkedTerm = UMLSFiltersBeforeOntologyCreation.convertToLowerCaseIfWordsMoreThan2AndCharactersMoreThan10AndNotAbbreviationOrAcronym(term, voc, listOfAbbreviationsOrAcronyms);
if (!checkedTerm.equals(term)) {
log_output.add("TERM HAS BEEN CONVERTED TO LOWERCASE IN ONTOLOGY|" + line);
}
term = checkedTerm;
List<TermStore> terms = concept.getTerms();
terms.add(new TermStore(term));
concept.setTerms(terms);
prevSui = sui;
// Add vocabulary
String vocstring = columns[vocCol].trim();
Integer vocID = vocabularies.get(vocstring);
if (vocID == null) {
vocID = -1000 - vocabularies.size();
vocabularies.put(vocstring, vocID);
Concept vocabulary = new Concept(vocID);
vocabulary.setName(vocstring);
ontology.setConcept(vocabulary);
}
// Set the vocabulary if it has not been set before for the concept
if (!foundVocsForConcept.contains(vocID)) {
Relation relation = new Relation(concept.getID(), DefaultTypes.fromVocabulary, vocID);
ontology.setRelation(relation);
foundVocsForConcept.add(vocID);
}
}
}
}
if (concept != null) {
if (concept.getTerms().size() != 0)
concept.setName(concept.getTerms().get(0).text);
ontology.setConcept(concept);
}
}
}