/* * Concept profile generation tool suite * Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center, * Rotterdam, The Netherlands * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package JochemBuilder.KEGGcompound; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.erasmusmc.ids.DatabaseID; import org.erasmusmc.ontology.Concept; import org.erasmusmc.ontology.DefaultTypes; import org.erasmusmc.ontology.OntologyStore; import org.erasmusmc.ontology.Relation; import org.erasmusmc.ontology.TermStore; import org.erasmusmc.ontology.ontologyutilities.OntologyUtilities; import org.erasmusmc.utilities.ReadTextFile; import org.erasmusmc.utilities.WriteTextFile; public class ChemicalsFromKEGGcompound { public int generalChemicalVocID = -3000; public String generalChemicalVocName = "CHEMICAL"; public int specificChemicalVocID = -3005; public String specificChemicalVocName = "KEGGCOMPUND"; public int umlsSemID = -103; public String umlsSemName = "Chemical"; public OntologyStore run(String filename, String mappingfile){ OntologyStore ontology = new OntologyStore(); //Set the standard vocabulary and umls semantic type concepts Concept generalVocabulary = new Concept(generalChemicalVocID); generalVocabulary.setName(generalChemicalVocName); ontology.setConcept(generalVocabulary); Concept specificVocabulary = new Concept(specificChemicalVocID); specificVocabulary.setName(specificChemicalVocName); ontology.setConcept(specificVocabulary); Concept semantictype = new Concept(umlsSemID); semantictype.setName(umlsSemName); ontology.setConcept(semantictype); WriteTextFile writeFile = new WriteTextFile(mappingfile); ReadTextFile file = new ReadTextFile(filename); Iterator<String> iterator = file.getIterator(); boolean name = false; List<TermStore> terms = null; List<DatabaseID> databaseIds = null; String keggID = ""; String formula = ""; String drugId = ""; String prevTerm = ""; String term = ""; boolean wait = false; boolean map = false; boolean databaseLinks = false; String dbIdString = ""; int cui = 5000000; int lineCount = 0; while(iterator.hasNext()){ lineCount++; if (lineCount % 10000 == 0) System.out.println(lineCount); String line = iterator.next(); if (line.startsWith("NAME")){ name = true; line = line.substring("NAME".length()); terms = new ArrayList<TermStore>(); } else if (line.startsWith("ENTRY")){ line = line.substring("ENTRY".length()); line = line.trim().split(" ")[0]; keggID = line.trim(); } else if (line.startsWith("REMARK")){ line = line.substring("REMARK".length()); if (line.indexOf("Same as:")!=-1){ map = true; String[] mapping = line.split(":"); drugId = mapping[1].trim(); } } else if (line.startsWith("FORMULA")){ line = line.substring("FORMULA".length()); line = line.trim().split(" ")[0]; formula = line.trim(); terms.add(new TermStore(formula)); } else if (line.startsWith("DBLINKS")){ databaseLinks = true; line = line.substring("DBLINKS".length()); databaseIds = new ArrayList<DatabaseID>(); } if (databaseLinks){ if (line.startsWith(" ")){ dbIdString = line.trim(); String[] ids = dbIdString.split(":"); String dbName = ids[0].trim(); if (dbName.equals("CAS")){ databaseIds.add(new DatabaseID(dbName, ids[1].trim())); } if (dbName.equals("PubChem")){ databaseIds.add(new DatabaseID("PUBS", ids[1].trim())); } if (dbName.equals("ChEBI")){ databaseIds.add(new DatabaseID("CHEB", ids[1].trim())); } } else databaseLinks = false; } if (name){ if (line.startsWith(" ")){ term = line.trim(); if (term.endsWith("-")||term.endsWith(",")){ wait = true; prevTerm = term; }else if (term.endsWith(";") && wait){ wait = false; term = term.substring(0, term.length()-1); term = prevTerm+term; if (!term.contains("Transferred to")) terms.add(new TermStore(term)); } else if (term.endsWith(";") && !wait){ term = term.substring(0, term.length()-1); if (!term.contains("Transferred to")) terms.add(new TermStore(term)); } else if (!wait){ if (!term.contains("Transferred to")) terms.add(new TermStore(term)); } } else{ name = false; if (wait && !term.contains("Transferred to") && !term.equals(prevTerm) && prevTerm.length()!=0){ wait = false; term = prevTerm+term; terms.add(new TermStore(term)); } else if (wait && !term.contains("Transferred to") && term.equals(prevTerm) && prevTerm.length()!=0){ wait = false; terms.add(new TermStore(term)); } } } else if (line.startsWith("///")){ if (terms.size() != 0){ Concept concept = new Concept(cui++); // Replace double quotation mark with single if there is only one mark in the string for (TermStore termToCheck: terms){ int i = 0; char currentchar; int numberOfQuotationMarks = 0; while (i < termToCheck.text.length()){ currentchar = termToCheck.text.charAt(i); if (currentchar =='"'){ numberOfQuotationMarks++; } i++; } if (numberOfQuotationMarks==1){ termToCheck.text = termToCheck.text.replace('"', '\''); // System.out.println(termToCheck.text); } } // Set terms and remove duplicates concept.setTerms(terms); OntologyUtilities.removeDuplicateTerms(terms); //Set databaseIDs databaseIds.add(new DatabaseID("KEGG", keggID)); for (DatabaseID databaseId: databaseIds){ ontology.setDatabaseIDForConcept(concept.getID(), databaseId); } // Set concept ontology.setConcept(concept); //Set vocabularies and standard semantic type Relation generalVocRelation = new Relation(concept.getID(), DefaultTypes.fromVocabulary, generalChemicalVocID); ontology.setRelation(generalVocRelation); Relation specificVocRelation = new Relation(concept.getID(), DefaultTypes.fromVocabulary, specificChemicalVocID); ontology.setRelation(specificVocRelation); Relation semRelation = new Relation(concept.getID(), DefaultTypes.isOfSemanticType, umlsSemID); ontology.setRelation(semRelation); if (map){ writeFile.writeln(drugId+" TO "+keggID); map = false; } } databaseIds = new ArrayList<DatabaseID>(); terms = new ArrayList<TermStore>(); term = ""; prevTerm = ""; keggID = ""; drugId =""; } } writeFile.close(); return ontology; } }