/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package org.erasmusmc.dataimport;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.erasmusmc.ids.DatabaseID;
import org.erasmusmc.ontology.Concept;
import org.erasmusmc.ontology.OntologyPSFLoader;
import org.erasmusmc.ontology.OntologyStore;
import org.erasmusmc.ontology.TermStore;
import org.erasmusmc.utilities.ReadTextFile;
public class KEGGimport {
public static void main (String[] args){
OntologyStore ontology = new OntologyStore();
ReadTextFile file = new ReadTextFile("/data/KEGG/compound");
Iterator<String> iterator = file.getIterator();
boolean name = false;
List<TermStore> terms = null;
String keggID = "";
int cui = 5000000;
while(iterator.hasNext()){
String line = iterator.next();
if (line.startsWith("NAME")){
name = true;
line = line.substring("NAME".length());
terms = new ArrayList<TermStore>();
} else if (line.startsWith("ENTRY")){
line = line.substring("ENTRY".length());
line = line.trim().split(" ")[0];
//line = line.replace("Compound", "");
//line = line.replace("Peptide", "");
keggID = line.trim();
}
if (name){
if (line.startsWith(" ")){
String term = line.trim();
if (term.endsWith(";"))
term = term.substring(0, term.length()-1);
if (!term.contains("Transferred to"))
terms.add(new TermStore(term));
} else {
name = false;
if (terms.size() != 0){
Concept concept = new Concept(cui++);
concept.setTerms(terms);
ontology.setDatabaseIDForConcept(concept.getID(), new DatabaseID("KEGG", keggID));
ontology.setConcept(concept);
}
}
}
}
OntologyPSFLoader loader = new OntologyPSFLoader();
loader.ontology = ontology;
loader.saveToPSF("/home/schuemie/TREC2007/thesauri/BiologicalSubstances.psf");
}
}