/* * Concept profile generation and analysis for Gene-Disease paper * Copyright (C) 2015 Biosemantics Group, Leiden University Medical Center * Leiden, The Netherlands * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package analysis; import static KnowledgeTransfer.ConceptProfileUtil.readCidFile; import static KnowledgeTransfer.PathConfigs.HPRD_GENE_CIDS; import static KnowledgeTransfer.PathConfigs.ONTOLOGY_NAME; import static KnowledgeTransfer.PathConfigs.THESAURUS_DISEASE_CIDS; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import org.erasmusmc.ontology.Ontology; import org.erasmusmc.ontology.OntologyManager; import org.erasmusmc.utilities.ReadCSVFile; public class AddLabelsToProfiles { public static void main(String[] args) throws IOException { // for (String file: new String[]{"1855923", "3053655"}) { // addLabelToProfile(file); // } addLabelToProfile("data"); } public static void addLabelToProfile(String filename) { File outfile = new File("/tmp/" + filename + "-WithLabels"); // ReadCSVFile input = new ReadCSVFile(PathConfigs.CONCEPT_PROFILES_DIR + filename); ReadCSVFile input = new ReadCSVFile("/tmp/cpgp/results/matchscores.txt"); Iterator<List<String>> it = input.iterator(); PrintWriter output = null; List<Integer> columnsToTranslate = Arrays.asList(0, 1); try { output = new PrintWriter(outfile); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } assert input != null && output != null; OntologyManager ontologyManager = new OntologyManager(); Ontology ontology = ontologyManager.fetchClient(ONTOLOGY_NAME); List<Integer> genes = readCidFile(HPRD_GENE_CIDS); List<Integer> diseases = readCidFile(THESAURUS_DISEASE_CIDS); List<Integer> allConceptIDS = new ArrayList<Integer>(); allConceptIDS.addAll(genes); allConceptIDS.addAll(diseases); int cnt = 0; Map<Integer, String> labels = new HashMap<Integer, String>(); for(Integer concept: allConceptIDS) { if( ++cnt%1000 == 0 ) { System.out.println(cnt); } String label = ontology.getConcept(concept).getTerms().get(0).text; labels.put(concept, label); } System.out.println("Finished loading labels"); cnt = 0; while(it.hasNext()) { if( ++cnt%1000 == 0 ) { System.out.println(cnt); output.flush(); } List<String> fields = it.next(); String result = ""; for(int i=0; i<fields.size(); i++) { if(i!=0) { result += ","; } result += fields.get(i); if(columnsToTranslate.contains(i)) { Integer concept = Integer.parseInt(fields.get(i)); //result += "," + ontology.getConcept(concept).getTerms().get(0); result += ",\"" + labels.get(concept) + "\""; } } output.println(result); // Integer conceptA = Integer.parseInt(fields.get(0)); // Integer conceptB = Integer.parseInt(fields.get(1)); // String uncertaintyCoeff = fields.get(2); // // output.println(String.format("\"%s\",\"%s\",%s", // ontology.getConcept(conceptA).getTerms().get(0), // ontology.getConcept(conceptB).getTerms().get(0), // uncertaintyCoeff)); } output.close(); } }