/* * Concept profile generation tool suite * Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center, * Rotterdam, The Netherlands * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ /* * Adds affymetrix IDs to an ontology */ package org.erasmusmc.dataimport.genes; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import org.erasmusmc.ids.DatabaseID; import org.erasmusmc.ontology.Concept; import org.erasmusmc.ontology.Ontology; import org.erasmusmc.utilities.StringUtilities; public class Affymetrix { public static String libraryFolder = "/home/public/thesauri/affymetrix/"; public Affymetrix(Ontology ontology){ fetchAffyIDs(); insertAffyIDs(ontology); } private void insertAffyIDs(Ontology ontology) { System.out.println(StringUtilities.now()+"\tInserting Affymetrix IDs"); int count = 0; Iterator<Concept> conceptIterator = ontology.getConceptIterator(); while (conceptIterator.hasNext()){ int conceptID = conceptIterator.next().getID(); List<DatabaseID> dblinks = ontology.getDatabaseIDsForConcept(conceptID); if (dblinks != null){ Set<DatabaseID> affyIDs = new HashSet<DatabaseID>(); for (DatabaseID dblink : dblinks){ Set<DatabaseID> affyID = dblink2affys.get(dblink); if (affyID != null) affyIDs.addAll(affyID); } for (DatabaseID affyID : affyIDs) ontology.setDatabaseIDForConcept(conceptID, affyID); count += affyIDs.size(); } } System.out.println(StringUtilities.now()+"\tInserted number of ids: "+count); } private void fetchAffyIDs() { File f = new File(libraryFolder); String[] filenames = f.list(); for (String filename : filenames) if (filename.toLowerCase().endsWith(".gin")){ processFile(filename); } System.out.println("Number of links found: "+dblink2affys.size()); } private void processFile(String filename) { System.out.println(StringUtilities.now()+"\tNow reading file: "+filename); try { FileInputStream PSFFile = new FileInputStream(libraryFolder+filename); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(PSFFile),1000000); try { while (bufferedReader.ready() && !bufferedReader.readLine().startsWith("1")){} //Skip header while (bufferedReader.ready()){ processLine(bufferedReader.readLine()); } bufferedReader.close(); } catch (IOException e) { e.printStackTrace(); } } catch (FileNotFoundException e){ e.printStackTrace(); } } private void processLine(String string) { String affyID = getAffyID(string); List<DatabaseID> dblinks = getDatabaseIDs(string); if (dblinks.size() != 0){ DatabaseID affyLink = new DatabaseID("AF", affyID); for (DatabaseID dblink : dblinks) { Set<DatabaseID> affyIDs = dblink2affys.get(dblink); if (affyIDs == null){ affyIDs = new HashSet<DatabaseID>(); dblink2affys.put(dblink, affyIDs); } affyIDs.add(affyLink); } } } private List<DatabaseID> getDatabaseIDs(String string) { List<DatabaseID> result = new ArrayList<DatabaseID>(); String UG = getDBID(string, "/UG="); if (!UG.equals("")) result.add(new DatabaseID("UG", UG)); String EG = getDBID(string, "/LL="); if (!EG.equals("")) result.add(new DatabaseID("EG", EG)); return result; } private String getDBID(String string, String prefix) { int start = string.indexOf(prefix); if (start != -1){ start += prefix.length(); int i = start; while (i < string.length() && !Character.isWhitespace(string.charAt(i))) i++; return string.substring(start, i); } return ""; } private String getAffyID(String string) { int start = string.indexOf("\t\t\t"); if (start != -1){ int end = string.indexOf("\t", start+4); return string.substring(start+3, end); } return ""; } //private Map<DatabaseID, DatabaseID> dblink2affy = new HashMap<DatabaseID, DatabaseID>(); private Map<DatabaseID, Set<DatabaseID>> dblink2affys = new HashMap<DatabaseID, Set<DatabaseID>>(); }