/* * Concept profile generation and analysis for Gene-Disease paper * Copyright (C) 2015 Biosemantics Group, Leiden University Medical Center * Leiden, The Netherlands * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package analysis; import static KnowledgeTransfer.ConceptProfileUtil.loadConceptFrequencies; import static KnowledgeTransfer.ConceptProfileUtil.readCidFile; import static KnowledgeTransfer.PathConfigs.CONCEPT_FREQUENCIES_FILENAME; import static KnowledgeTransfer.PathConfigs.HPRD_GENE_CIDS; import static KnowledgeTransfer.PathConfigs.THESAURUS_DISEASE_CIDS; import java.io.FileNotFoundException; import java.io.PrintWriter; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; import org.erasmusmc.ontology.Ontology; import org.erasmusmc.ontology.OntologyManager; import KnowledgeTransfer.PathConfigs; public class MostLeastCommonConcepts { private static final int MAX_ITEMS = 25; public static void main(String[] args) { OntologyManager ontologyManager = new OntologyManager(); Ontology ontology = ontologyManager.fetchClient(PathConfigs.ONTOLOGY_NAME); PrintWriter output = null; try { output = new PrintWriter(PathConfigs.RESULTS_BASE_DIR + "BestAndWorstConcepts.tsv"); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } final HashMap<Integer,Integer> conceptid2frequency = loadConceptFrequencies(CONCEPT_FREQUENCIES_FILENAME); List<Integer> genes = readCidFile(HPRD_GENE_CIDS); List<Integer> diseases = readCidFile(THESAURUS_DISEASE_CIDS); Collections.sort(genes, new Comparator<Integer>() { @Override public int compare(Integer o1, Integer o2) { return getCidFreq(o1, conceptid2frequency).compareTo(getCidFreq(o2, conceptid2frequency)); } }); for(Integer i=0; i<MAX_ITEMS; i++) { Integer most = genes.get(genes.size()-i-1); Integer least = genes.get(i); output.print(String.format("%10s\t %6d\t", ontology.getConcept(least).getTerms().get(0), getCidFreq(least, conceptid2frequency))); output.println(String.format("%10s\t %6d", ontology.getConcept(most).getTerms().get(0), getCidFreq(most, conceptid2frequency))); } output.println(""); Collections.sort(diseases, new Comparator<Integer>() { @Override public int compare(Integer o1, Integer o2) { return getCidFreq(o1, conceptid2frequency).compareTo(getCidFreq(o2, conceptid2frequency)); } }); for(Integer i=0; i<MAX_ITEMS; i++) { Integer most = diseases.get(diseases.size()-i-1); Integer least = diseases.get(i); output.print(String.format("%40s\t %6d\t", ontology.getConcept(least).getTerms().get(0), getCidFreq(least, conceptid2frequency))); output.println(String.format("%40s\t %6d", ontology.getConcept(most).getTerms().get(0), getCidFreq(most, conceptid2frequency))); } output.close(); } private static Integer getCidFreq(Integer cid, Map<Integer,Integer> conceptid2frequency) { Integer result = conceptid2frequency.get(cid); if(result == null) result = 0; return result; } }