package com.compomics.util.experiment.biology.genes; import com.compomics.util.experiment.biology.genes.ensembl.GeneMapping; import com.compomics.util.experiment.biology.genes.go.GoMapping; import com.compomics.util.experiment.identification.protein_sequences.SequenceFactory; import com.compomics.util.protein.Header; import java.io.IOException; import java.io.Serializable; import java.util.HashMap; import java.util.HashSet; /** * The gene maps for a given project. * * @author Marc Vaudel * @author Harald Barsnes */ public class GeneMaps implements Serializable { /** * The Ensembl versions for each species. */ private HashMap<String, String> ensemblVersionsMap; /** * Gene name to EnsemblId map. */ private HashMap<String, String> geneNameToEnsemblIdMap; /** * Gene name to chromosome name map. */ private HashMap<String, String> geneNameToChromosomeMap; /** * Protein accession to GO terms map. */ private HashMap<String, HashSet<String>> proteinToGoMap; /** * GO term to protein accession map. */ private HashMap<String, HashSet<String>> goAccessionToProteinMap; /** * GO term accession to name map. */ private HashMap<String, String> goNamesMap; /** * Creates new maps. */ public GeneMaps() { ensemblVersionsMap = new HashMap<String, String>(); geneNameToEnsemblIdMap = new HashMap<String, String>(); geneNameToChromosomeMap = new HashMap<String, String>(); proteinToGoMap = new HashMap<String, HashSet<String>>(); goAccessionToProteinMap = new HashMap<String, HashSet<String>>(); goNamesMap = new HashMap<String, String>(); } /** * Imports the gene maps from a gene Mapping. * * @param geneMapping a gene mapping */ public void importMaps(GeneMapping geneMapping) { geneNameToChromosomeMap.putAll(geneMapping.getGeneNameToChromosome()); geneNameToEnsemblIdMap.putAll(geneMapping.getGeneNameToAccession()); } /** * Imports the GO maps from a GO mapping. * * @param goMapping a go mapping */ public void setMaps(GoMapping goMapping) { goNamesMap.putAll(goMapping.getGoNamesMap()); HashMap<String, HashSet<String>> otherMap = goMapping.getProteinToGoMap(); for (String accession : otherMap.keySet()) { HashSet<String> goTerms = proteinToGoMap.get(accession); if (goTerms == null) { goTerms = new HashSet<String>(); proteinToGoMap.put(accession, goTerms); } goTerms.addAll(otherMap.get(accession)); } otherMap = goMapping.getGoToProteinMap(); for (String accession : otherMap.keySet()) { HashSet<String> proteins = goAccessionToProteinMap.get(accession); if (proteins == null) { proteins = new HashSet<String>(); proteinToGoMap.put(accession, proteins); } proteins.addAll(otherMap.get(accession)); } } /** * Returns the Ensembl version map. * * @return the Ensembl version map */ public HashMap<String, String> getEnsemblVersionsMap() { return ensemblVersionsMap; } /** * Sets the Ensembl version map. * * @param ensemblVersionsMap the Ensembl version map */ public void setEnsemblVersionsMap(HashMap<String, String> ensemblVersionsMap) { this.ensemblVersionsMap = ensemblVersionsMap; } /** * Returns the gene name to Ensembl ID map. * * @return the gene name to Ensembl ID map */ public HashMap<String, String> getGeneNameToEnsemblIdMap() { return geneNameToEnsemblIdMap; } /** * Sets the gene name to Ensembl ID map. * * @param geneNameToEnsemblIdMap the gene name to Ensembl ID map */ public void setGeneNameToEnsemblIdMap(HashMap<String, String> geneNameToEnsemblIdMap) { this.geneNameToEnsemblIdMap = geneNameToEnsemblIdMap; } /** * Returns the gene name to chromosome map. * * @return the gene name to chromosome map */ public HashMap<String, String> getGeneNameToChromosomeMap() { return geneNameToChromosomeMap; } /** * Sets the gene name to chromosome map. * * @param geneNameToChromosomeMap the gene name to chromosome map */ public void setGeneNameToChromosomeMap(HashMap<String, String> geneNameToChromosomeMap) { this.geneNameToChromosomeMap = geneNameToChromosomeMap; } /** * Returns the protein to GO terms accession map. * * @return the protein to GO terms accession map */ public HashMap<String, HashSet<String>> getProteinToGoMap() { return proteinToGoMap; } /** * Sets the protein to GO terms accession map. * * @param proteinToGoMap the protein to GO terms accession map */ public void setProteinToGoMap(HashMap<String, HashSet<String>> proteinToGoMap) { this.proteinToGoMap = proteinToGoMap; } /** * Returns the GO to protein accession map. * * @return the GO to protein accession map */ public HashMap<String, HashSet<String>> getGoAccessionToProteinMap() { return goAccessionToProteinMap; } /** * Sets the GO to protein accession map. * * @param goAccessionToProteinMap the GO to protein accession map */ public void setGoAccessionToProteinMap(HashMap<String, HashSet<String>> goAccessionToProteinMap) { this.goAccessionToProteinMap = goAccessionToProteinMap; } /** * Returns the GO accession to names map. * * @return the GO accession to names map */ public HashMap<String, String> getGoNamesMap() { return goNamesMap; } /** * Sets the GO accession to names map. * * @param goNamesMap the GO accession to names map */ public void setGoNamesMap(HashMap<String, String> goNamesMap) { this.goNamesMap = goNamesMap; } /** * Returns the gene name for a given protein accession. The protein must be * from a Uniprot fasta file loaded in the SequenceFactory. * * @param accession the protein accession * * @return the gene name * * @throws java.io.IOException exception thrown whenever an error occurred * while reading the fasta file. * @throws java.lang.InterruptedException exception thrown whenever an error * occurred while waiting for the connection to the fasta file to recover. */ public String getGeneNameForProtein(String accession) throws IOException, InterruptedException { Header header = SequenceFactory.getInstance().getHeader(accession); return header.getGeneName(); } /** * Returns the Ensembl ID corresponding to the given gene name. Null if not * found. * * @param geneName a gene name * * @return the corresponding Ensembl ID */ public String getEnsemblId(String geneName) { return geneNameToEnsemblIdMap.get(geneName); } /** * Returns the chromosome corresponding to a given gene name. * * @param geneName the gene name * * @return the chromosome name */ public String getChromosome(String geneName) { return geneNameToChromosomeMap.get(geneName); } /** * Returns the go terms accessions for a protein accession. Null if not * found. * * @param proteinAccession a protein accession * * @return the go terms names */ public HashSet<String> getGoTermsForProtein(String proteinAccession) { return proteinToGoMap.get(proteinAccession); } /** * Returns the protein accessions for a GO accession. Null if not found. * * @param goAccession a GO term accession * * @return the corresponding proteins */ public HashSet<String> getProteinsForGoTerm(String goAccession) { return goAccessionToProteinMap.get(goAccession); } /** * Returns the name of a GO term. * * @param goAccession the accession of the GO term. * * @return the name of a GO term */ public String getNameForGoTerm(String goAccession) { return goNamesMap.get(goAccession); } /** * Returns the GO Term accession corresponding to the given name. Null if * not found. * * @param goName the GO name * * @return the corresponding accession */ public String getGoAccession(String goName) { for (String goAccession : goNamesMap.keySet()) { if (goNamesMap.get(goAccession).equals(goName)) { return goAccession; } } return null; } /** * Returns the go terms names for a protein accession. Null if not found. * * @param proteinAccession a protein accession * * @return the go terms names * * @throws java.io.IOException exception thrown whenever an error occurred * while reading the FASTA file. * @throws java.lang.InterruptedException exception thrown whenever an error * occurred while waiting for the connection to the FASTA file to recover. */ public HashSet<String> getGoNamesForProtein(String proteinAccession) throws IOException, InterruptedException { HashSet<String> goTerms = getGoTermsForProtein(proteinAccession); if (goTerms != null) { HashSet<String> goNames = new HashSet<String>(goTerms.size()); for (String goTerm : goTerms) { String goName = getNameForGoTerm(goTerm); if (goName != null) { goNames.add(goName); } } return goNames; } return null; } /** * Indicates whether the GO maps are populated. * * @return a boolean indicating whether the GO maps are populated */ public boolean hasGoMappings() { return !goNamesMap.isEmpty() && !proteinToGoMap.isEmpty(); } }