/* * Concept profile generation tool suite * Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center, * Rotterdam, The Netherlands * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package org.erasmusmc.ontology; import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import org.erasmusmc.ids.DatabaseID; import org.erasmusmc.ontology.Concept.DisambiguationType; import org.erasmusmc.ontology.ontologyutilities.OntologyCurator; import org.erasmusmc.utilities.ReadTextFile; import org.erasmusmc.utilities.StringUtilities; import org.erasmusmc.utilities.WriteTextFile; public class OntologyFileLoader { public static boolean semanticTypesNegative = true; private boolean loadTermsOnly = false; private String version = "UNKNOWN"; public static void main(String[] args) { String source = null; String target = null; try { if (args.length == 0) { throw new IllegalArgumentException(); } source = args[0]; target = source.replaceAll("\\.psf$", ".ontology"); if (source.equals(target)) { throw new IllegalArgumentException(); } } catch (IllegalArgumentException e) { System.out.println("Usage: some_file.psf"); System.out.println("The output file some_file.ontology will be created."); System.exit(0); } String name = "UMLS"; convertPSFToOntology(source, target, name, 3000000); } public static void convertPSFToOntology(String source, String target, String name, int minConceptID) { OntologyPSFLoader psfLoader = new OntologyPSFLoader(); psfLoader.loadFromPSF(source); OntologyStore ontology = psfLoader.ontology; ontology.setName(name); OntologyCurator curator = new OntologyCurator(); curator.curateAndPrepare(ontology); if (minConceptID != -1) for (Concept concept : ontology) if (concept.getID() > 0) if (concept.getID() < minConceptID) concept.setDisambiguationType(DisambiguationType.loose); else concept.setDisambiguationType(DisambiguationType.strict); OntologyFileLoader loader = new OntologyFileLoader(); loader.save(ontology, target); } public static void convertOntologyFileToPsf(String source, String target, String name) { OntologyFileLoader loader = new OntologyFileLoader(); OntologyStore ontology = loader.load(source); ontology.setName(name); OntologyCurator curator = new OntologyCurator(); curator.curateAndPrepare(ontology); boolean doVoc = true; boolean doSem = true; try { FileOutputStream PSFFile = new FileOutputStream(target); BufferedWriter bufferedWrite = new BufferedWriter(new OutputStreamWriter(PSFFile, "UTF-8"), 1000000); StringBuffer firstline = new StringBuffer(); firstline.append("LEVEL|"); if (doVoc) { firstline.append("VOC|"); } if (doSem) { firstline.append("SEM|"); } firstline.append("DEFAULT|0"); bufferedWrite.write(firstline.toString()); bufferedWrite.newLine(); // Add concepts Iterator<Concept> values = ontology.getConceptIterator(); while (values.hasNext()) { Concept concept = values.next(); StringBuffer line = new StringBuffer(); line.append(0 + "|"); if (concept.ID < 0){ line.append("|"+"|"+concept.getName()); }else { if (doVoc) { line.append(relatedToString(ontology, concept.getID(), DefaultTypes.fromVocabulary) + "|"); } if (doSem) { line.append(semrelatedToString(ontology, concept.getID(), DefaultTypes.isOfSemanticType) + "|"); } if (concept.terms != null) { if (concept.terms.size() != 0) { line.append(StringUtilities.escape(concept.terms.get(0).text)); } for (int j = 1; j < concept.terms.size(); j++) { line.append(";" + StringUtilities.escape(concept.terms.get(j).text)); } } else { line.append(StringUtilities.escape(concept.getName())); } boolean hasDef = false; if (!((concept.definition == null) || concept.definition.equals(""))) { line.append("?" + StringUtilities.escape(concept.definition)); hasDef = true; } // Append the database identifiers (if any) List<DatabaseID> databaseIDs = ontology.getDatabaseIDsForConcept(concept.getID()); if (databaseIDs != null) { boolean first = true; for (DatabaseID databaseID: databaseIDs) { if (first && !hasDef) { first = false; line.append("?"); } else line.append("\\;"); line.append(databaseID.database); line.append("_"); line.append(StringUtilities.escape(databaseID.ID)); } } } line.append("|" + Integer.toString(concept.ID)); bufferedWrite.write(line.toString()); bufferedWrite.newLine(); } // Add hierarchy: Iterator<Concept> conceptIterator = ontology.getConceptIterator(); while (conceptIterator.hasNext()) { Concept concept = conceptIterator.next(); List<Relation> relations = ontology.getRelationsForConceptAsSubject(concept.getID(), DefaultTypes.isParentOf); if (relations != null && relations.size() != 0) { StringBuffer line = new StringBuffer(); line.append("H|"); Iterator<Relation> relationIterator = relations.iterator(); while (relationIterator.hasNext()) { Relation relation = relationIterator.next(); line.append(relation.object); if (relationIterator.hasNext()) line.append(";"); } line.append("|"); line.append(concept.getID()); bufferedWrite.write(line.toString()); bufferedWrite.newLine(); } } bufferedWrite.flush(); } catch (IOException e) { e.printStackTrace(); } } private static String relatedToString(OntologyStore ontology,int conceptID, int relationType) { List<Relation> vocs = ontology.getRelationsForConceptAsSubject(conceptID, relationType); StringBuffer buffer = new StringBuffer(); if (vocs.size() != 0) buffer.append(ontology.getConcept(vocs.get(0).object).getName()); for (int i = 1; i < vocs.size(); i++) { buffer.append(";"); buffer.append(ontology.getConcept(vocs.get(i).object).getName()); } return buffer.toString(); } private static String semrelatedToString(OntologyStore ontology,int conceptID, int relationType) { List<Relation> sems = ontology.getRelationsForConceptAsSubject(conceptID, relationType); StringBuffer buffer = new StringBuffer(); if (sems.size() != 0 && ontology.getConcept(sems.get(0).object) == null) System.out.println(sems.get(0).toString()); if (sems.size() != 0) { Integer id = ontology.getConcept(sems.get(0).object).getID(); if (semanticTypesNegative) id = -id; buffer.append(id); } for (int i = 1; i < sems.size(); i++) { buffer.append(";"); Integer id = ontology.getConcept(sems.get(i).object).getID(); if (semanticTypesNegative) id = -id; buffer.append(id); } return buffer.toString(); } public void setLoadTermsOnly(boolean value) { loadTermsOnly = value; } public boolean getLoadTermsOnly() { return loadTermsOnly; } public String getVersion() { return version; } public OntologyStore load(String filename) { File file = new File(filename); if (!file.exists()){ System.err.println("File not found: " + filename); return null; } OntologyStore ontology = new OntologyStore(); Iterator<String> iterator = new ReadTextFile(filename).iterator(); loadHeader(iterator, ontology); loadBody(iterator, ontology); return ontology; } private void loadBody(Iterator<String> iterator, OntologyStore ontology) { Map<String, Integer> vocs = new HashMap<String, Integer>(); Concept concept = new Concept(null); concept.setTerms(new ArrayList<TermStore>()); String nameSpace = null; while (iterator.hasNext()) { String line = iterator.next(); String key = extractKey(line); if (key != null) { if (key.equals("--")) { ((ArrayList<TermStore>) concept.getTerms()).trimToSize(); ((ArrayList<DatabaseID>) ontology.getDatabaseIDsForConcept(concept.getID())).trimToSize(); ((ArrayList<Relation>) ontology.getRelationsForConceptAsSubject(concept.getID())).trimToSize(); ontology.setConcept(concept); concept = new Concept(null); concept.setTerms(new ArrayList<TermStore>()); nameSpace = null; } else { String value = extractValue(line); if (key.equals("NS")) { nameSpace = value; } else if (key.equals("ID")) { if (nameSpace != null && nameSpace.equals("SemType")) { concept.setID(-Integer.parseInt(value)); } else if (nameSpace != null && nameSpace.equals("Voc")) { concept.setID(getVocID(vocs, ontology, value)); concept.setName(value); // Dirty mapping for OntologyStore: store Voc Id in name field } else concept.setID(Integer.parseInt(value)); } else if (key.equals("NA") && !loadTermsOnly) { if (nameSpace != null && nameSpace.equals("Voc")) // Dirty mapping for OntogyStore: store voc name in definition field concept.setDefinition(unescape(value)); else concept.setName(unescape(value)); } else if (key.equals("TM")) { concept.getTerms().add(parseTerm(value)); } else if (key.equals("DF") && !loadTermsOnly) { concept.setDefinition(unescape(value)); } else if (key.equals("DB") && !loadTermsOnly) { ontology.setDatabaseIDForConcept(concept.getID(), DatabaseID.parseString2DatabaseID(value)); } else if (key.equals("ST") && !loadTermsOnly) { int semTypeID = -Integer.parseInt(value); ontology.setRelation(new Relation(concept.getID(), DefaultTypes.isOfSemanticType, semTypeID)); } else if (key.equals("VO") && !loadTermsOnly) { int vocID = getVocID(vocs, ontology, value); ontology.setRelation(new Relation(concept.getID(), DefaultTypes.fromVocabulary, vocID)); } else if (key.equals("PA") && !loadTermsOnly) { int parentID = Integer.parseInt(value); ontology.setRelation(new Relation(parentID, DefaultTypes.isParentOf, concept.getID())); } else if (key.equals("DI")) { if (value.toLowerCase().equals("st")) concept.setDisambiguationType(DisambiguationType.strict); else if (value.toLowerCase().equals("lo")) concept.setDisambiguationType(DisambiguationType.loose); } } } } } private void loadHeader(Iterator<String> iterator, OntologyStore ontology) { iterator.next(); version = iterator.next().substring(3); ontology.setName(iterator.next().substring(3)); iterator.next(); } private int getVocID(Map<String, Integer> vocs, OntologyStore ontology2, String value) { Integer vocID = vocs.get(value); if (vocID == null) { vocID = -1000 - vocs.size(); vocs.put(value, vocID); Concept concept = new Concept(vocID); concept.setName(value); ontology2.setConcept(concept); } return vocID; } private TermStore parseTerm(String value) { String[] cols = value.split("\t@"); TermStore term = new TermStore(cols[0]); term.orderSensitive = true; term.caseSensitive = true; term.normalised = false; if (cols.length == 2) { if (cols[1].contains("ci")) term.caseSensitive = false; if (cols[1].contains("no")) term.normalised = true; } return term; } private String extractValue(String line) { return line.substring(3); } private String extractKey(String line) { if (line.length() < 2) return null; return line.substring(0, 2); } public void save(OntologyStore ontology, String filename) { WriteTextFile out = new WriteTextFile(filename); writeHeader(out, ontology); for (Concept concept : ontology) writeConcept(ontology, concept, out); out.close(); } private void writeHeader(WriteTextFile out, OntologyStore ontology) { out.writeln("# ErasmusMC ontology file"); out.writeln("VR 1.0"); out.writeln("ON " + ontology.getName()); out.writeln("--"); } private void writeConcept(OntologyStore ontology, Concept concept, WriteTextFile out) { if (concept.getID() <= -1000) { // It is a Voc writeAsVoc(ontology, concept, out); } else if (concept.getID() < 0) { // It is a SemType writeAsSemanticType(ontology, concept, out); } else { writeAsRegular(ontology, concept, concept.getID().toString(), out); } } private void writeAsVoc(OntologyStore ontology, Concept concept, WriteTextFile out) { if (concept.getName().length() == 0) { return; } out.writeln("NS Voc"); out.writeln("ID " + concept.getName()); String definition = escape(concept.getDefinition()); if (definition.length() != 0) out.writeln("NA " + definition); out.writeln("--"); } private void writeAsSemanticType(OntologyStore ontology, Concept concept, WriteTextFile out) { out.writeln("NS SemType"); writeAsRegular(ontology, concept, Integer.toString(-concept.getID()), out); } private void writeAsRegular(OntologyStore ontology, Concept concept, String conceptID, WriteTextFile out) { out.writeln("ID " + conceptID); if (concept.getName().length() > 0) { out.writeln("NA " + escape(concept.getName())); } for (TermStore term : concept.getTerms()) out.writeln("TM " + escape(term.text) + termFlags(term)); if (concept.getDefinition().length() != 0) out.writeln("DF " + escape(concept.getDefinition())); for (DatabaseID dbID : ontology.getDatabaseIDsForConcept(concept.getID())) out.writeln("DB " + dbID.toString()); for (Relation relation : ontology.getRelationsForConceptAsSubject(concept.getID(), DefaultTypes.isOfSemanticType)) out.writeln("ST " + (-relation.object)); for (Relation relation : ontology.getRelationsForConceptAsSubject(concept.getID(), DefaultTypes.fromVocabulary)) out.writeln("VO " + escape(ontology.getConcept(relation.object).getName())); for (Relation relation : ontology.getRelationsForConceptAsObject(concept.getID(), DefaultTypes.isParentOf)) out.writeln("PA " + relation.subject); if (concept.getDisambiguationType() == DisambiguationType.loose) out.writeln("DI lo"); else if (concept.getDisambiguationType() == DisambiguationType.strict) out.writeln("DI st"); out.writeln("--"); } private String termFlags(TermStore term) { StringBuilder sb = new StringBuilder(); if (!term.caseSensitive) sb.append("\t@match=ci"); if (term.normalised) if (sb.length() == 0) sb.append("\t@match=no"); else sb.append(",no"); return sb.toString(); } private String escape(String text) { return text.replace("\t", "\\t").replace("\n", "\\n"); } private String unescape(String text) { return text.replace("\\t", "\t").replace("\\n", "\n"); } }