package com.compomics.util.protein_sequences_manager.gui.sequences_import.taxonomy;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import javax.swing.tree.DefaultMutableTreeNode;
import javax.swing.tree.DefaultTreeModel;
/**
* UniProt taxonomy provider.
*
* @author Kenneth Verheggen
*/
public class UniprotTaxonomyProvider {
/**
* The lowest layer of taxonomy.
*/
private static DefaultMutableTreeNode rootNode;
/**
* The lineages for a certain taxonomy.
*/
private final HashSet<String> lineages = new HashSet<String>();
/**
* The default JTreeModel.
*/
private final DefaultTreeModel model;
/**
* Cache of taxonomies to increase speed and reduce server load.
*/
private final HashMap<String, String> cachedTaxonomies = new HashMap<String, String>();
/**
* GUI constructor.
*
* @param model the tree model you want to update
*/
public UniprotTaxonomyProvider(DefaultTreeModel model) {
rootNode = (DefaultMutableTreeNode) model.getRoot();
this.model = model;
}
/**
* Normal constructor, GUI-less mode.
*/
public UniprotTaxonomyProvider() {
rootNode = new DefaultMutableTreeNode("root");
model = new DefaultTreeModel(rootNode);
}
/**
* Returns the child taxonomies.
*
* @param taxonomyName the taxonomy name
* @return all the children names for a given taxonomy name (or identifier)
* @throws MalformedURLException if a MalformedURLException occurs
* @throws IOException if an IOException occurs
*/
public List<String> getChildTaxonomies(String taxonomyName) throws MalformedURLException, IOException {
List<String> childrenTaxonomies = new ArrayList<String>();
String url = ConnectionManager.getUniprotHost() + "taxonomy/?query=\"" + taxonomyName + "\"&format=tab";
URL website = new URL(url);
URLConnection connection = website.openConnection();
BufferedReader in = null;
try {
in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
String inputLine;
in.readLine();
while ((inputLine = in.readLine()) != null) {
String[] split = inputLine.split("\t");
String taxName = split[2];
String taxID = split[0];
String lineage = split[8] + ";" + taxName;
lineages.add(lineage);
cachedTaxonomies.put(taxName, taxID);
childrenTaxonomies.add(taxName);
}
} catch (ArrayIndexOutOfBoundsException e) {
//this was an endpoint
// @TODO: better error handling
} catch (IOException e) {
// @TODO: better error handling
} finally {
if (in != null) {
in.close();
}
}
return childrenTaxonomies;
}
/**
* Returns the possible lineages for a given taxonomy.
*
* @param taxonomyName the taxonomy name
* @return the possible lineages for a given taxonomy
* @throws MalformedURLException if a MalformedURLException occurs
* @throws IOException if an IOException occurs
* @throws URISyntaxException if a URISyntaxException occurs
*/
public Set<String> getLineagesForTaxonomyID(String taxonomyName) throws MalformedURLException, IOException, IllegalArgumentException, URISyntaxException {
int maxPerQuery = 20000;
String query = "\"" + taxonomyName + "\"" + "&sort=score&format=tab";
URLConnection connection = ConnectionManager.getQueryConnection(query, QueryType.TAXONOMY);
HashMap<String, String> tempTaxIdMap = new HashMap<String, String>();
List<String> tempLineages = new ArrayList<String>();
BufferedReader in = null;
try {
in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
String inputLine;
in.readLine(); // read the header
while ((inputLine = in.readLine()) != null) {
String[] split = inputLine.split("\t");
String taxName = split[2];
String taxID = split[0];
String lineage = split[8] + ";" + taxName;
tempLineages.add(lineage);
tempTaxIdMap.put(taxName, taxID);
if (tempLineages.size() > maxPerQuery) {
throw new IllegalArgumentException("There were over " + maxPerQuery + " lineages found for this query. Try a more specific search or browse manually");
}
}
lineages.addAll(tempLineages);
cachedTaxonomies.putAll(tempTaxIdMap);
} catch (ArrayIndexOutOfBoundsException e) {
//this was an endpoint
// @TODO: better error handling
} catch (IOException e) {
// @TODO: better error handling
} finally {
if (in != null) {
in.close();
}
}
return lineages;
}
/**
* Returns the possible lineages for a given taxonomy
*
* @param taxonomyFile taxonomy file from the UniProt web page
* @return the possible lineages for a given taxonomy
* @throws MalformedURLException if a MalformedURLException occurs
* @throws IOException if an IOException occurs
*/
public Set<String> getLineagesFromFile(File taxonomyFile) throws MalformedURLException, IOException {
BufferedReader in = null;
try {
in = new BufferedReader(new InputStreamReader(new FileInputStream(taxonomyFile)));
String inputLine;
in.readLine(); // read the header
while ((inputLine = in.readLine()) != null) {
if (lineages.size() % 10000 == 0) {
System.out.println(lineages.size() + " processed lineages"); // @TODO: never have prints in a method
}
String[] split = inputLine.split("\t");
lineages.add(split[8] + ";" + split[2]);
cachedTaxonomies.put(split[2], split[0]);
}
} catch (ArrayIndexOutOfBoundsException e) {
//this was an endpoint
// @TODO: better error handling
} catch (IOException e) {
// @TODO: better error handling
} finally {
if (in != null) {
in.close();
}
}
return lineages;
}
/**
* Returns a model after searching for an unknown taxonomy name (for example
* after a search).
*
* @param taxonomyTabFile taxonomy file from the UniProt web page
* @return a model after searching for an unknown taxonomy name (for example
* after a search)
* @throws IOException if an IOException occurs
* @throws InterruptedException if an InterruptedException occurs
*/
public DefaultTreeModel getModelFromFile(File taxonomyTabFile) throws IOException, InterruptedException {
System.out.println("Building Tree"); // @TODO: never have prints in a method
getLineagesFromFile(taxonomyTabFile);
int processed = 0;
for (String lineage : lineages) {
if (processed % 10000 == 0) {
System.out.println(processed + " added in tree"); // @TODO: never have prints in a method
}
String[] taxonomies = lineage.split(";");
DefaultMutableTreeNode parent = new DefaultMutableTreeNode(new String[]{"root", taxonomies[0]});
for (String taxonomy : taxonomies) {
taxonomy = taxonomy.trim();
DefaultMutableTreeNode node = searchNode(taxonomy);
if (node == null) {
node = new DefaultMutableTreeNode(taxonomy);
model.insertNodeInto(node, parent, parent.getChildCount());
}
parent = node;
}
processed++;
}
return model;
}
/**
* Returns a model after searching for an unknown taxonomy name (for example
* after a search).
*
* @param taxonomyName the taxonomy name
* @return a model after searching for an unknown taxonomy name (for example
* after a search)
* @throws MalformedURLException if a MalformedURLException occurs
* @throws IOException if an IOException occurs
* @throws URISyntaxException if a URISyntaxException occurs
*/
public DefaultTreeModel getModelAfterSearch(String taxonomyName) throws IllegalArgumentException, IOException, MalformedURLException, URISyntaxException {
getLineagesForTaxonomyID(taxonomyName);
for (String lineage : lineages) {
String[] taxonomies = lineage.split(";");
DefaultMutableTreeNode parent = new DefaultMutableTreeNode(new String[]{"root", taxonomies[0]});
for (String taxonomy : taxonomies) {
taxonomy = taxonomy.trim();
DefaultMutableTreeNode node = searchNode(taxonomy);
if (node == null) {
node = new DefaultMutableTreeNode(taxonomy);
model.insertNodeInto(node, parent, parent.getChildCount());
}
parent = node;
}
}
return model;
}
/**
* Returns a model for a known taxonomy name (for example after node click).
*
* @param taxonomyName the taxonomy name
* @return a model for a known taxonomy name (for example after node click)
* @throws IOException if an IOException occurs
*/
public DefaultTreeModel getModelAfterClick(String taxonomyName) throws IOException {
List<String> childTaxonomies = getChildTaxonomies(taxonomyName);
for (String lineage : childTaxonomies) {
String[] taxonomies = lineage.split(";");
DefaultMutableTreeNode parent = searchNode(taxonomyName);
DefaultMutableTreeNode node;
for (String taxonomy : taxonomies) {
taxonomy = taxonomy.trim();
node = new DefaultMutableTreeNode(taxonomy);
try {
model.insertNodeInto(node, parent, parent.getChildCount());
} catch (NullPointerException e) {
//no more children here
}
}
}
return model;
}
/**
* Returns the given node.
*
* @param nodeStr the node string.
* @return the given node
*/
private DefaultMutableTreeNode searchNode(String nodeStr) {
DefaultMutableTreeNode node;
Enumeration e = rootNode.breadthFirstEnumeration();
while (e.hasMoreElements()) {
node = (DefaultMutableTreeNode) e.nextElement();
if (nodeStr.equals(node.getUserObject().toString())) {
return node;
}
}
return null;
}
/**
* Returns the taxonomyID that was encountered for a taxonomyName. This is
* required to speed up the tree considerably.
*
* @param taxonomyName the taxonomy name
* @return the taxonomyID
*/
public String getCachedTaxonomyID(String taxonomyName) {
return cachedTaxonomies.get(taxonomyName);
}
/**
* Returns the query taxonomy.
*
* @param queryTerm the term you wish to search for (example "human" or
* 9606)
* @param returnID boolean indicating whether you want the taxonomyID or the
* taxonomy name
* @return the query taxonomy
* @throws MalformedURLException if a MalformedURLException occurs
* @throws IOException if an IOException occurs
*/
public String queryTaxonomy(String queryTerm, boolean returnID) throws MalformedURLException, IOException {
String url = ConnectionManager.getUniprotHost() + "taxonomy/?query=" + queryTerm + "&sort=score&format=tab";
URL website = new URL(url);
String taxName = "";
String taxID = "";
URLConnection connection = website.openConnection();
HashMap<String, String> tempTaxIdMap = new HashMap<String, String>();
try {
BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
String inputLine;
in.readLine(); // read the header
if ((inputLine = in.readLine()) != null) {
String[] split = inputLine.split("\t");
taxName = split[2];
taxID = split[0];
tempTaxIdMap.put(taxName, taxID);
}
if (!taxName.isEmpty() && !taxID.isEmpty()) {
cachedTaxonomies.put(taxName, taxID);
}
} catch (ArrayIndexOutOfBoundsException ex) {
//this was an endpoint?
} catch (IOException ex) {
ex.printStackTrace(); // @TODO: better error handling
}
if (returnID) {
return taxID;
} else {
return taxName;
}
}
}