/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package org.erasmusmc.dataimport.genes;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.erasmusmc.ids.DatabaseID;
import org.erasmusmc.ontology.OntologyManager;
import org.erasmusmc.ontology.OntologyPSFLoader;
import org.erasmusmc.ontology.OntologyStore;
import org.erasmusmc.utilities.ReadTextFile;
import org.erasmusmc.utilities.StringUtilities;
import org.erasmusmc.utilities.WriteTextFile;
/**
* Fetches the EMBL identifiers from SwissProt and adds them to the thesaurus.
*
* @author Schuemie
*
*/public class ImportEMBLIdentifiersFromSwissProt {
public static void main(String[] args){
new ImportEMBLIdentifiersFromSwissProt();
}
public ImportEMBLIdentifiersFromSwissProt(){
//Load ontology
System.out.println(StringUtilities.now() + "\tLoading ontology");
OntologyManager manager = new OntologyManager();
OntologyStore ontology = manager.fetchStoreFromDatabase("Homologene_curated_may2007");
//OntologyPSFLoader loader1 = new OntologyPSFLoader();
//loader1.loadFromPSF("/home/public/thesauri/GenelistHuman/GenelistHuman_v2.4.0.psf");
//OntologyStore ontology = loader1.ontology;
ontology.createIndexForDatabaseIDs();
//Process SwissProt file
System.out.println(StringUtilities.now() + "\tProcessing SwissProt file");
ReadTextFile file = new ReadTextFile("/data/SwissProt/uniprot_sprot.dat");
WriteTextFile log = new WriteTextFile("/temp/EMBLIDlog.txt");
Iterator<String> iterator = file.getIterator();
Set<String> SPIDs = new HashSet<String>();
Set<String> EMBLIDs = new HashSet<String>();
boolean correctOrganism = false;
while (iterator.hasNext()){
String line = iterator.next();
if (line.startsWith("//")){
if (correctOrganism){
Set<Integer> CIDs = new HashSet<Integer>();
for (String SPID : SPIDs){
DatabaseID dbID = new DatabaseID("SP", SPID);
CIDs.addAll(ontology.getConceptIDs(dbID));
}
if (CIDs.size() == 1){
for (Integer CID : CIDs)
for (String EMBLID : EMBLIDs)
ontology.setDatabaseIDForConcept(CID, new DatabaseID("EMBL", EMBLID));
} else
log.writeln(("Incorrect number ("+CIDs.size()+") of CIDs for :" + SPIDs.toString()));
}
SPIDs.clear();
EMBLIDs.clear();
correctOrganism = false;
} else if (line.startsWith("AC ")){
String[] tempSPIDs = line.substring(4).split(";");
for (String SPID : tempSPIDs)
SPIDs.add(SPID.trim());
} else if (line.startsWith("DR EMBL; ")){
String[] tempEMBL = line.split(";");
EMBLIDs.add(tempEMBL[1].trim());
} else if (line.startsWith("OS ")){
if (line.equals("OS Homo sapiens (Human).") ||
line.equals("OS Mus musculus (Mouse).") ||
line.equals("OS Rattus norvegicus (Rat)."))
correctOrganism = true;
}
}
log.close();
//Save ontology
System.out.println(StringUtilities.now() + "\tSave ontology");
OntologyPSFLoader loader = new OntologyPSFLoader();
loader.ontology = ontology;
loader.saveToPSF("/temp/Homologe_EMBL.psf");
}
}