/* * Concept profile generation tool suite * Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center, * Rotterdam, The Netherlands * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package org.erasmusmc.dataimport.UMLS; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; import org.erasmusmc.ontology.Concept; import org.erasmusmc.ontology.DefaultTypes; import org.erasmusmc.ontology.Ontology; import org.erasmusmc.ontology.Relation; import org.erasmusmc.utilities.ReadTextFile; import org.erasmusmc.utilities.StringUtilities; public class MRRELLoader { static public boolean useBroaderThenRelations = false; static public boolean filterVocabularies = true; static public Set<String> vocFilter = loadVocFilter(); static int cuiFromCol = 4; static int cuiToCol = 0; static int relIDCol = 3; static String parentRelationID = "PAR"; static String broaderthenRelationID = "RB"; public static void addParentRelations(Ontology ontology, String mrrelFile) { ReadTextFile mrrelHandle = new ReadTextFile(mrrelFile); Iterator<String> it = mrrelHandle.iterator(); int count = 0; while (it.hasNext()) { String line = it.next(); if (line.length() != 0) { List<String> columns = StringUtilities.safeSplit(line, '|'); String relID = columns.get(relIDCol).trim(); if (relID.equalsIgnoreCase(parentRelationID) || (useBroaderThenRelations && relID.equalsIgnoreCase(broaderthenRelationID))) { if (!filterVocabularies || vocFilter.contains(columns.get(10).trim())) { Integer cuiFrom = Integer.parseInt(columns.get(cuiFromCol).trim().substring(1, columns.get(cuiFromCol).length())); Integer cuiTo = Integer.parseInt(columns.get(cuiToCol).trim().substring(1, columns.get(cuiToCol).length())); Concept cFrom = ontology.getConcept(cuiFrom); Concept cTo = ontology.getConcept(cuiTo); if (cFrom != null && cTo != null && cFrom != cTo) { Relation relation = new Relation(cuiFrom, DefaultTypes.isParentOf, cuiTo); ontology.setRelation(relation); //System.out.println( relation.toString()); count++; } } } } } System.out.println("Added " + count + " relations to " + ontology.getName()); } private static Set<String> loadVocFilter() { Set<String> vocs = new HashSet<String>(); // alle vocs staan aangegeven hier, heel vies selecteren via aan en uitvinken. // vocs.add("HCPCS"); // vocs.add("CTCAE"); // vocs.add("ICD9CM"); // vocs.add("CSP"); // vocs.add("AIR"); // vocs.add("HL7V2.5"); // vocs.add("USPMG"); // vocs.add("NCBI"); // vocs.add("NCI"); // vocs.add("NDFRT"); // vocs.add("LNC"); // vocs.add("CCS"); // vocs.add("CST"); // vocs.add("AOD"); // vocs.add("AOT"); // vocs.add("PDQ"); vocs.add("MSH"); vocs.add("GO"); // vocs.add("HL7V3.0"); // vocs.add("UWDA"); // vocs.add("ICPC"); // vocs.add("PNDS"); return vocs; } }