package com.compomics.util.experiment.biology.taxonomy.mappings; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.util.HashMap; import org.apache.commons.httpclient.URIException; import org.apache.commons.httpclient.util.URIUtil; /** * Mapping of the UniProt taxonomy. * * @author Marc Vaudel */ public class UniprotTaxonomy { /** * The separator used to separate line contents. */ public final static String SEPARATOR = "\t"; /** * UniProt species name to NCBI ID. */ private HashMap<String, Integer> nameToIdMap; /** * NCBI ID to Latin name. */ private HashMap<Integer, String> idToNameMap; /** * NCBI ID to common name. */ private HashMap<Integer, String> idToCommonNameMap; /** * The local file to store the mappings. */ private File mappingFile; /** * Constructor. */ public UniprotTaxonomy() { nameToIdMap = new HashMap<String, Integer>(); idToNameMap = new HashMap<Integer, String>(); idToCommonNameMap = new HashMap<Integer, String>(); } /** * Loads the species mapping from a file. Previous mapping will be * overwritten. * * @param speciesFile the species file * * @throws IOException exception thrown whenever an error occurred while * reading the file. */ public void loadMapping(File speciesFile) throws IOException { this.mappingFile = speciesFile; // read the species list FileReader r = new FileReader(speciesFile); try { BufferedReader br = new BufferedReader(r); try { String line = br.readLine(); while ((line = br.readLine()) != null) { line = line.trim(); if (line.length() > 0) { String[] elements = line.split(SEPARATOR); Integer id = new Integer(elements[0].trim()); String latinName = elements[2].trim(); String commonName = elements[3].trim(); nameToIdMap.put(latinName, id); idToNameMap.put(id, latinName); if (!commonName.equals("")) { idToCommonNameMap.put(id, commonName); } } } } finally { br.close(); } } finally { r.close(); } } /** * Downloads the mapping for the given species name from UniProt and saves * it to the mapping file. * * @param name the name of the species to query * * @throws MalformedURLException exception thrown whenever the query URL is * malformed * @throws URIException exception thrown whenever an error occurred while * downloading the mapping * @throws IOException exception thrown whenever an error occurred while * downloading the mapping */ public void downloadMapping(String name) throws MalformedURLException, URIException, IOException { String query = URIUtil.encodeQuery(name); URL url = new URL("http://www.uniprot.org/taxonomy/?sort=score&desc=&compress=no&query=" + query + "&format=tab&columns=id"); URLConnection conn = url.openConnection(); BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream())); try { BufferedWriter bw = new BufferedWriter(new FileWriter(mappingFile, true)); try { String line = br.readLine(); while ((line = br.readLine()) != null) { line = line.trim(); if (line.length() > 0) { String[] elements = line.split(SEPARATOR); Integer id = new Integer(elements[0].trim()); String latinName = elements[2].trim(); String commonName = elements[3].trim(); if (!idToNameMap.containsKey(id)) { nameToIdMap.put(latinName, id); idToNameMap.put(id, latinName); if (!commonName.equals("")) { idToCommonNameMap.put(id, commonName); } // Try to save the new mapping try { bw.write(line); bw.newLine(); } catch (Exception e) { e.printStackTrace(); } } } } } finally { bw.close(); } } finally { br.close(); } } /** * Returns the NCBI taxon corresponding to the given species name. Null if * not found. * * @param name the species name * @param query boolean indicating whether UniProt should be queried if the * species is not found * * @return the taxon * * @throws MalformedURLException exception thrown whenever the query URL is * malformed * @throws URIException exception thrown whenever an error occurred while * downloading the mapping * @throws IOException exception thrown whenever an error occurred while * downloading the mapping */ public Integer getId(String name, boolean query) throws MalformedURLException, URIException, IOException { Integer result = nameToIdMap.get(name); if (result == null && query) { downloadMapping(name); result = nameToIdMap.get(name); } return result; } /** * Returns the Latin name corresponding to the given NCBI taxon. * * @param id the NCBI taxon * * @return the Latin name */ public String getLatinName(Integer id) { return idToNameMap.get(id); } /** * Returns the common name corresponding to the given NCBI taxon. * * @param id the NCBI taxon * * @return the common name */ public String getCommonName(Integer id) { return idToCommonNameMap.get(id); } /** * Downloads the UniProt taxonomy mapping to the given file. * * @param destinationFile the file where to write the taxonomy file * * @throws IOException Exception thrown whenever an error occurred while * reading or writing data. */ public static void downloadTaxonomyFile(File destinationFile) throws IOException { URL url = new URL("http://www.uniprot.org/taxonomy/?format=tab&columns=id"); URLConnection conn = url.openConnection(); BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream())); try { BufferedWriter bw = new BufferedWriter(new FileWriter(destinationFile)); try { String rowLine; while ((rowLine = br.readLine()) != null) { bw.write(rowLine); bw.newLine(); } } finally { bw.close(); } } finally { br.close(); } } }