/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package org.erasmusmc.dataimport.genes;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.erasmusmc.dataimport.UMLS.UMLSFilteringAfterOntologyCreation;
import org.erasmusmc.ids.DatabaseID;
import org.erasmusmc.ontology.Concept;
import org.erasmusmc.ontology.OntologyPSFLoader;
import org.erasmusmc.ontology.OntologyStore;
import org.erasmusmc.ontology.TermStore;
import org.erasmusmc.ontology.ontologyutilities.HomonymAnalyzer;
import org.erasmusmc.ontology.ontologyutilities.OntologyUtilities;
import org.erasmusmc.utilities.StringUtilities;
public class MergeSGDAndSwissProt {
public static void main (String[] args){
new MergeSGDAndSwissProt();
}
public MergeSGDAndSwissProt(){
System.out.println(StringUtilities.now() + "\tLoading ontologies");
OntologyPSFLoader loader = new OntologyPSFLoader();
loader.loadFromPSF("/home/public/Thesauri/GenesNonHuman/SGD_june2007.psf");
OntologyStore SGD = loader.ontology;
loader.saveToPSF("/temp/test.psf");
loader.loadFromPSF("/home/public/Thesauri/GenesNonHuman/SP_yeast.psf");
OntologyStore SP = loader.ontology;
cleanup(SP);
SGD.createIndexForDatabaseIDs();
System.out.println(StringUtilities.now() + "\tMerging");
Iterator<Concept> iterator = SP.getConceptIterator();
while (iterator.hasNext()){
Concept concept = iterator.next();
List<DatabaseID> databaseIDs = SP.getDatabaseIDsForConcept(concept.getID());
Set<Integer> cuis = new HashSet<Integer>();
for (DatabaseID databaseID : databaseIDs){
cuis.addAll(SGD.getConceptIDs(databaseID));
}
if (cuis.size() == 0)
System.out.println("No SGD ids found for SP concept " + concept.getName() + "\t" + concept.getID());
else if (cuis.size() > 1)
System.out.println("Multiple SGD ids found for SP concept " + concept.getName());
else {
for (int cui : cuis){
List<TermStore> terms = SGD.getConcept(cui).getTerms();
terms.addAll(concept.getTerms());
for (DatabaseID databaseID : databaseIDs)
SGD.setDatabaseIDForConcept(cui, databaseID);
OntologyUtilities.removeDuplicateTerms(terms);
}
}
}
loader = new OntologyPSFLoader();
loader.ontology = SGD;
loader.saveToPSF("/home/public/Thesauri/GenesNonHuman/GenelistYeast_june2007_2.psf");
}
private void cleanup(OntologyStore sp) {
System.out.println(StringUtilities.now() + "\tCleaning up Swiss-Prot");
HomonymAnalyzer analyzer = new HomonymAnalyzer();
analyzer.setOntology(sp);
Map<Integer,Map<Integer,List<String>>> homonyms = analyzer.compareConcepts();
Set<Integer> homonymIDs = new HashSet<Integer>();
for (Map.Entry<Integer,Map<Integer,List<String>>> entry : homonyms.entrySet()){
homonymIDs.addAll(entry.getValue().keySet());
homonymIDs.add(entry.getKey());
}
Iterator<Concept> iterator = sp.getConceptIterator();
while (iterator.hasNext()){
Concept concept = iterator.next();
if (homonymIDs.contains(concept.getID()))
iterator.remove();
else{
List<TermStore> terms = concept.getTerms();
Iterator<TermStore> termIterator = terms.iterator();
while (termIterator.hasNext()){
TermStore term = termIterator.next();
if (UMLSFilteringAfterOntologyCreation.ECPattern.matcher(term.text).find())
termIterator.remove();
}
}
}
}
}