package com.compomics.util.experiment.biology.genes.go; import com.compomics.util.waiting.WaitingHandler; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; /** * Class parsing BioMart protein go mappings and storing them in maps. * * @author Marc Vaudel * @author Harald Barsnes */ public class GoMapping { /** * The separator used to separate line contents. */ public final static String SEPARATOR = "\t"; /** * Protein accession to go terms map. */ private HashMap<String, HashSet<String>> proteinToGoMap; /** * Go terms to Protein accession map. */ private HashMap<String, HashSet<String>> goToProteinMap; /** * Go term accession to name map. */ private HashMap<String, String> goAccessionsToNamesMap; /** * Go term name to accession map. */ private HashMap<String, String> goNamesToAccessionsMap; /** * A sorted list of GO terms names. */ private ArrayList<String> sortedTermNames; /** * Constructor. */ public GoMapping() { proteinToGoMap = new HashMap<String, HashSet<String>>(); goToProteinMap = new HashMap<String, HashSet<String>>(); goAccessionsToNamesMap = new HashMap<String, String>(); goNamesToAccessionsMap = new HashMap<String, String>(); } /** * Reads go mappings from a BioMart file. The structure of the file should * be protein accession go accession go name. * * Previous mappings are silently overwritten. * * @param file the file containing the GO mapping * @param waitingHandler a waiting handler allowing canceling of the * process. * * @throws IOException if an exception occurs while reading the file */ public void loadMappingsFromFile(File file, WaitingHandler waitingHandler) throws IOException { sortedTermNames = null; // read the species list FileReader r = new FileReader(file); try { BufferedReader br = new BufferedReader(r); try { String line; while ((line = br.readLine()) != null) { String[] splittedLine = line.split(SEPARATOR); if (splittedLine.length == 3 && !splittedLine[0].equals("") && !splittedLine[1].equals("")) { String proteinAccession = splittedLine[0]; String goTermAccession = splittedLine[1]; String goTermName = splittedLine[2].toLowerCase(); HashSet<String> goTerms = proteinToGoMap.get(proteinAccession); if (goTerms == null) { goTerms = new HashSet<String>(); proteinToGoMap.put(proteinAccession, goTerms); } goTerms.add(goTermAccession); HashSet<String> proteinAccessions = goToProteinMap.get(goTermAccession); if (proteinAccessions == null) { proteinAccessions = new HashSet<String>(); goToProteinMap.put(goTermAccession, proteinAccessions); } proteinAccessions.add(proteinAccession); goAccessionsToNamesMap.put(goTermAccession, goTermName); goNamesToAccessionsMap.put(goTermName, goTermAccession); } if (waitingHandler != null && waitingHandler.isRunCanceled()) { return; } } } finally { br.close(); } } finally { r.close(); } } /** * Returns the GO accessions linked to a given protein accession. Null if * not found. * * @param proteinAccession the accession of the protein of interest * * @return a list of GO accession numbers, an empty list if no mapping is * found */ public HashSet<String> getGoAccessions(String proteinAccession) { return proteinToGoMap.get(proteinAccession); } /** * Returns the protein accessions linked to a given GO term. Null if not * found. * * @param goTermAccession the accession of the GO term * * @return a list of GO accession numbers, an empty list if no mapping is * found */ public HashSet<String> getProteinAccessions(String goTermAccession) { return goToProteinMap.get(goTermAccession); } /** * Returns the name of a GO term. * * @param goAccession the accession number of the GO term of interest * * @return the name, null if not found */ public String getTermName(String goAccession) { return goAccessionsToNamesMap.get(goAccession); } /** * Returns the accession of a GO term. * * @param goName the name of the GO term of interest * * @return the accession, null if not found */ public String getTermAccession(String goName) { return goNamesToAccessionsMap.get(goName); } /** * Returns the GO accession to name map. * * @return the GO accession to name map */ public HashMap<String, String> getGoNamesMap() { return goAccessionsToNamesMap; } /** * Returns the protein to GO accession map. * * @return the protein to GO accession map */ public HashMap<String, HashSet<String>> getProteinToGoMap() { return proteinToGoMap; } /** * Returns the GO to protein accession map. * * @return the GO to protein accession map */ public HashMap<String, HashSet<String>> getGoToProteinMap() { return goToProteinMap; } /** * Returns a sorted list of all GO Terms names linked to proteins in the * proteinToGoMap. * * @return a sorted list of all GO Terms names */ public ArrayList<String> getSortedTermNames() { if (sortedTermNames == null) { HashSet<String> goNames = new HashSet<String>(goAccessionsToNamesMap.size()); for (HashSet<String> goAccessions : proteinToGoMap.values()) { for (String goAccession : goAccessions) { String goName = getTermName(goAccession); if (goName != null) { goNames.add(goName); } } } sortedTermNames = new ArrayList<String>(goNames); Collections.sort(sortedTermNames); } return sortedTermNames; } }