/* * Concept profile generation and analysis for Gene-Disease paper * Copyright (C) 2015 Biosemantics Group, Leiden University Medical Center * Leiden, The Netherlands * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package analysis; import static KnowledgeTransfer.ConceptProfileUtil.loadConceptFrequencies; import static KnowledgeTransfer.ConceptProfileUtil.readCidFile; import static KnowledgeTransfer.PathConfigs.ANALYSIS_DIR; import static KnowledgeTransfer.PathConfigs.CONCEPT_FREQUENCIES_FILENAME; import static KnowledgeTransfer.PathConfigs.THESAURUS_DISEASE_CIDS; import static KnowledgeTransfer.PathConfigs.HPRD_GENE_CIDS; import static java.util.Collections.reverseOrder; import static java.util.Collections.singletonList; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; import org.erasmusmc.utilities.WriteCSVFile; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Stopwatch; public class Figure2 { private static Logger _LOG = LoggerFactory.getLogger(Figure2.class); public static List<Integer> getFrequenciesForConcepts(Map<Integer, Integer> conceptid2frequency, List<Integer> concepts) { List<Integer> result = new ArrayList<Integer>(); for (Integer concept : concepts) { Integer freq = conceptid2frequency.get(concept); if (freq != null) { result.add(freq); } } return result; } public static void writeColumns(String fileName, List<Integer> freqs) { WriteCSVFile output = new WriteCSVFile(fileName); int count = 1; output.write(Arrays.asList("Rank", "Frequency")); for (Integer freq : freqs) { String col1 = Integer.toString(count++); String col2 = freq.toString(); output.write(Arrays.asList(col1, col2)); } output.close(); } public static void main(String[] args) { Stopwatch stopWatch = Stopwatch.createStarted(); Map<Integer, Integer> conceptid2frequency = loadConceptFrequencies(CONCEPT_FREQUENCIES_FILENAME); // First for genes: List<Integer> cids = readCidFile(HPRD_GENE_CIDS); List<Integer> result = getFrequenciesForConcepts(conceptid2frequency, cids); Collections.sort(result, reverseOrder()); writeColumns(ANALYSIS_DIR + "gene_literature_freq.csv", result); // Diseases: cids = readCidFile(THESAURUS_DISEASE_CIDS); result = getFrequenciesForConcepts(conceptid2frequency, cids); Collections.sort(result, Collections.reverseOrder()); writeColumns(ANALYSIS_DIR + "disease_literature_freq.csv", result); _LOG.info("{} ", stopWatch);// stopWatch } }