package com.compomics.util.experiment.biology.taxonomy.mappings;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
/**
* This class provides information about the species mapping in Ensembl Genomes
* (Bacteria, Fungi, Metazoa, Plants, Protists).
*
* @author Marc Vaudel
*/
public class EnsemblGenomesSpecies {
/**
* The separator used to separate line contents.
*/
public final static String SEPARATOR = "\t";
/**
* NCBI ID to name.
*/
private HashMap<Integer, String> idToNameMap;
/**
* NCBI ID to Ensembl division.
*/
private HashMap<Integer, String> idToDivisionMap;
/**
* NCBI ID to Ensembl assembly.
*/
private HashMap<Integer, String> idToAssemblyMap;
/**
* Enum of the different Ensembl genome divisions.
*/
public static enum EnsemblGenomeDivision {
bacteria("EnsemblBacteria", "bacteria"),
fungi("EnsemblFungi", "fungi"),
metazoa("EnsemblMetazoa", "metazoa"),
plants("EnsemblPlants", "plants"),
protists("EnsemblProtists", "protists");
/**
* The name in the Ensembl mapping file.
*/
public final String ensemblName;
/**
* The schema name for XML queries.
*/
public final String ensemblType;
/**
* Constructor.
*
* @param ensemblName the name in the Ensembl mapping file
* @param ensemblType the Ensembl type for XML queries
*/
private EnsemblGenomeDivision(String ensemblName, String ensemblType) {
this.ensemblName = ensemblName;
this.ensemblType = ensemblType;
}
/**
* Returns the EnsemblGenomeDivision corresponding to the given Ensembl name. Null if not found.
*
* @param ensemblName the Ensembl name
*
* @return the EnsemblGenomeDivision
*/
public static EnsemblGenomeDivision getEnsemblGenomeDivisionFromName(String ensemblName) {
for (EnsemblGenomeDivision ensemblGenomeDivision : values()) {
if (ensemblGenomeDivision.ensemblName.equals(ensemblName)) {
return ensemblGenomeDivision;
}
}
return null;
}
}
/**
* Constructor.
*/
public EnsemblGenomesSpecies() {
idToNameMap = new HashMap<Integer, String>();
idToDivisionMap = new HashMap<Integer, String>();
idToAssemblyMap = new HashMap<Integer, String>();
}
/**
* Loads the species mapping from a file. Previous mapping will be
* overwritten.
*
* @param speciesFile the species file
*
* @throws IOException exception thrown whenever an error occurred while
* reading the file.
*/
public void loadMapping(File speciesFile) throws IOException {
// read the species list
FileReader r = new FileReader(speciesFile);
try {
BufferedReader br = new BufferedReader(r);
try {
String line = br.readLine();
while ((line = br.readLine()) != null) {
line = line.trim();
if (line.length() > 0) {
String[] elements = line.split(SEPARATOR);
Integer id = new Integer(elements[3].trim());
String name = elements[0].trim();
String division = elements[2].trim();
String assembly = elements[4].trim();
idToNameMap.put(id, name);
idToDivisionMap.put(id, division);
idToAssemblyMap.put(id, assembly);
}
}
} finally {
br.close();
}
} finally {
r.close();
}
}
/**
* Returns the name corresponding to the given NCBI taxon.
*
* @param taxon the NCBI taxon
*
* @return the name
*/
public String getName(Integer taxon) {
return idToNameMap.get(taxon);
}
/**
* Returns the division corresponding to the given NCBI taxon.
*
* @param taxon the NCBI taxon
*
* @return the division
*/
public EnsemblGenomeDivision getDivision(Integer taxon) {
String ensemblDivisionName = idToDivisionMap.get(taxon);
if (ensemblDivisionName == null) {
return null;
}
return EnsemblGenomeDivision.getEnsemblGenomeDivisionFromName(ensemblDivisionName);
}
/**
* Returns the Ensembl assembly corresponding to the given NCBI taxon.
*
* @param taxon the NCBI taxon
*
* @return the Ensembl assembly
*/
public String getAssembly(Integer taxon) {
return idToAssemblyMap.get(taxon);
}
/**
* Returns the taxons in this map.
*
* @return the taxons in this map
*/
public HashSet<Integer> getTaxons() {
return new HashSet<Integer>(idToAssemblyMap.keySet());
}
}