/******************************************************************************* * GenPlay, Einstein Genome Analyzer * Copyright (C) 2009, 2014 Albert Einstein College of Medicine * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu> * Nicolas Fourel <nicolas.fourel@einstein.yu.edu> * Eric Bouhassira <eric.bouhassira@einstein.yu.edu> * * Website: <http://genplay.einstein.yu.edu> ******************************************************************************/ package edu.yu.einstein.genplay.core.DAS; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.List; import java.util.concurrent.ExecutionException; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.SAXException; import edu.yu.einstein.genplay.core.manager.project.ProjectChromosomes; import edu.yu.einstein.genplay.core.manager.project.ProjectManager; import edu.yu.einstein.genplay.dataStructure.chromosome.Chromosome; import edu.yu.einstein.genplay.dataStructure.enums.AlleleType; import edu.yu.einstein.genplay.dataStructure.gene.Gene; import edu.yu.einstein.genplay.dataStructure.genomeWindow.GenomeWindow; import edu.yu.einstein.genplay.dataStructure.list.genomeWideList.SCWList.SCWList; import edu.yu.einstein.genplay.dataStructure.list.genomeWideList.SCWList.SimpleSCWList.SimpleSCWList; import edu.yu.einstein.genplay.dataStructure.list.genomeWideList.geneList.GeneList; import edu.yu.einstein.genplay.dataStructure.list.genomeWideList.geneList.SimpleGeneList; import edu.yu.einstein.genplay.dataStructure.list.listView.ListView; import edu.yu.einstein.genplay.dataStructure.scoredChromosomeWindow.ScoredChromosomeWindow; /** * Provides tools to connect and retrieve data from a DAS server * <br/>See <a href="http://www.biodas.org/documents/spec.html">http://www.biodas.org/documents/spec.html</a> * @author Julien Lajugie */ public class DASConnector { private final String serverAddress; // address of a DAS Server private final ProjectChromosomes projectChromosomes; // Instance of the Chromosome Manager private String genomeName; // for multi-genome project only. Name of the genome on which the data were mapped private AlleleType alleleType; // for multi-genome project only. Type of allele for synchronization /** * Creates an instance of {@link DASConnector} * @param serverAddress address of a DAS server */ public DASConnector(String serverAddress) { // get rid of spaces if there is some serverAddress = serverAddress.trim(); // add a "/" at the end of the address if there is none if (!serverAddress.substring(serverAddress.length() - 1).equals("/")) { serverAddress += "/"; } this.serverAddress = serverAddress; projectChromosomes = ProjectManager.getInstance().getProjectChromosomes(); } /** * Searches if there is an entry point associated to the specify chromosome in the list of entries * @param chr a {@link Chromosome} * @return the name of the entry. Null if none */ private EntryPoint findEntryPoint(List<EntryPoint> entryPointList, Chromosome chr) { boolean found = false; int i = 0; // we search for an entry point corresponding to the current chromosome while ((i < entryPointList.size()) && (!found)) { //System.out.println(entryPointList.get(i).getID()); if (chr.getName().equalsIgnoreCase(entryPointList.get(i).getID())) { found = true; } else { i++; } } if (found) { // if the stop position of the entry point can't be greater than the one of the chromosome EntryPoint resultEntry = entryPointList.get(i); if (resultEntry.getStop() > chr.getLength()) { resultEntry.setStop(chr.getLength()); } return resultEntry; } else { return null; } } /** * Generates a query for all the data for a specified data source, entry point and das type * @param dataSource a {@link DataSource} * @param entryPoint an {@link EntryPoint} * @param dasType a {@link DASType} * @return a {@link URL} containing the query * @throws MalformedURLException */ private URL generateQuery(DataSource dataSource, EntryPoint entryPoint, DASType dasType) throws MalformedURLException { String URLStr = new String(serverAddress); URLStr += dataSource.getID(); URLStr += "/features?segment="; URLStr += entryPoint.getID(); URLStr += ":"; URLStr += entryPoint.getStart(); URLStr += ","; URLStr += entryPoint.getStop(); URLStr += ";type="; URLStr += dasType.getID(); return new URL(URLStr); } /** * Generates a query for all the data for a specified data source, entry point, das type and data range * @param dataSource a {@link DataSource} * @param entryPoint an {@link EntryPoint} * @param dasType a {@link DASType} * @param genomeWindow a {@link GenomeWindow} * @return a {@link URL} containing the query * @throws MalformedURLException */ private URL generateQuery(DataSource dataSource, EntryPoint entryPoint, DASType dasType, GenomeWindow genomeWindow) throws MalformedURLException { String URLStr = new String(serverAddress); URLStr += dataSource.getID(); URLStr += "/features?segment="; URLStr += entryPoint.getID(); URLStr += ":"; int start; if(genomeWindow.getStart() < entryPoint.getStart()) { start = entryPoint.getStart(); } else { start = genomeWindow.getStart(); } URLStr += start; URLStr += ","; int stop; if(genomeWindow.getStop() > entryPoint.getStop()) { stop = entryPoint.getStop(); } else { stop = genomeWindow.getStop(); } URLStr += stop; URLStr += ";type="; URLStr += dasType.getID(); return new URL(URLStr); } /** * @return the alleleType */ public AlleleType getAlleleType() { return alleleType; } /** * Retrieves a list of DAS types for a specified data Source * @param dataSource a {@link DataSource} * @return a List of {@link DataSource} * @throws IOException * @throws ParserConfigurationException * @throws SAXException */ public List<DASType> getDASTypeList(DataSource dataSource) throws IOException, ParserConfigurationException, SAXException { URL dasTypesURL = new URL(serverAddress + dataSource.getID() + "/types"); URLConnection connection = dasTypesURL.openConnection(); connection.setUseCaches(true); SAXParserFactory parserFactory = SAXParserFactory.newInstance(); parserFactory.setValidating(true); SAXParser parser = parserFactory.newSAXParser(); DASTypeHandler dth = new DASTypeHandler(); parser.parse(connection.getInputStream(), dth); return dth.getDasTypeList(); } /** * Retrieves a list of Data Sources from the DAS server * @return a List of {@link DataSource} * @throws IOException * @throws ParserConfigurationException * @throws SAXException */ public List<DataSource> getDataSourceList() throws IOException, ParserConfigurationException, SAXException { URL dsnURL = new URL(serverAddress + "dsn"); URLConnection connection = dsnURL.openConnection(); connection.setUseCaches(true); SAXParserFactory parserFactory = SAXParserFactory.newInstance(); parserFactory.setValidating(true); SAXParser parser = parserFactory.newSAXParser(); DataSourceHandler dsh = new DataSourceHandler(); connection.getInputStream(); parser.parse(connection.getInputStream(), dsh); return dsh.getDataSourceList(); } /** * Retrieves a list of DAS entry points for a specified data Source * @param dataSource a {@link DataSource} * @return a List of {@link EntryPoint} * @throws IOException * @throws ParserConfigurationException * @throws SAXException */ public List<EntryPoint> getEntryPointList(DataSource dataSource) throws IOException, ParserConfigurationException, SAXException { URL entryPointURL = new URL(serverAddress + dataSource.getID() + "/entry_points"); //System.out.println("Entry Point URL: " + entryPointURL); URLConnection connection = entryPointURL.openConnection(); connection.setUseCaches(true); SAXParserFactory parserFactory = SAXParserFactory.newInstance(); parserFactory.setValidating(true); SAXParser parser = parserFactory.newSAXParser(); EntryPointHandler eph = new EntryPointHandler(); parser.parse(connection.getInputStream(), eph); return eph.getEntryPointList(); } /** * Retrieves a list of genes from a specified Data Source and a specified DAS Type * @param dataSource a {@link DataSource} * @param dasType a {@link DASType} * @return a {@link GeneList} * @throws IOException * @throws ParserConfigurationException * @throws SAXException * @throws ExecutionException * @throws InterruptedException */ public GeneList getGeneList(DataSource dataSource, DASType dasType) throws IOException, ParserConfigurationException, SAXException, InterruptedException, ExecutionException { List<EntryPoint> entryPointList = getEntryPointList(dataSource); List<ListView<Gene>> resultList = new ArrayList<ListView<Gene>>(); for (Chromosome currentChromo: projectChromosomes) { EntryPoint currentEntryPoint = findEntryPoint(entryPointList, currentChromo); // if we found a chromosome retrieve the data and // we create a genelist for this chromosome if (currentEntryPoint != null) { URL queryUrl = generateQuery(dataSource, currentEntryPoint, dasType); URLConnection connection = queryUrl.openConnection(); connection.setUseCaches(true); SAXParserFactory parserFactory = SAXParserFactory.newInstance(); parserFactory.setValidating(true); SAXParser parser = parserFactory.newSAXParser(); GeneHandler gh = new GeneHandler(currentChromo); // if the current project is a muti genome project we set the // name of the genome that was used for the mapping of the data if (ProjectManager.getInstance().isMultiGenomeProject()) { gh.setGenomeName(genomeName); gh.setAlleleType(alleleType); } parser.parse(connection.getInputStream(), gh); ListView<Gene> currentGeneList = gh.getGeneList(); resultList.add(currentGeneList); } else { resultList.add(null); } } return new SimpleGeneList(resultList, null, null); } /** * Retrieves a list of genes from a specified Data Source and a specified DAS Type and a specified Data Range * @param dataSource a {@link DataSource} * @param dasType a {@link DASType} * @param genomeWindow a {@link GenomeWindow} * @return a {@link GeneList} * @throws IOException * @throws ParserConfigurationException * @throws SAXException * @throws ExecutionException * @throws InterruptedException */ public GeneList getGeneList(DataSource dataSource, DASType dasType, GenomeWindow genomeWindow) throws IOException, ParserConfigurationException, SAXException, InterruptedException, ExecutionException { List<EntryPoint> entryPointList = getEntryPointList(dataSource); List<ListView<Gene>> resultList = new ArrayList<ListView<Gene>>(); Chromosome currentChromo = genomeWindow.getChromosome(); EntryPoint currentEntryPoint = findEntryPoint(entryPointList, currentChromo); if (currentEntryPoint != null) { URL queryUrl = generateQuery(dataSource, currentEntryPoint, dasType, genomeWindow); URLConnection connection = queryUrl.openConnection(); connection.setUseCaches(true); SAXParserFactory parserFactory = SAXParserFactory.newInstance(); parserFactory.setValidating(true); SAXParser parser = parserFactory.newSAXParser(); GeneHandler gh = new GeneHandler(currentChromo); // if the current project is a muti genome project we set the // name of the genome that was used for the mapping of the data if (ProjectManager.getInstance().isMultiGenomeProject()) { gh.setGenomeName(genomeName); gh.setAlleleType(alleleType); } parser.parse(connection.getInputStream(), gh); ListView<Gene> currentGeneList = gh.getGeneList(); resultList.add(currentGeneList); } else { resultList.add(null); } return new SimpleGeneList(resultList, null, null); } /** * @return the name of the genome on which the data were mapped. For multi-genome project only */ public String getGenomeName() { return genomeName; } /** * Retrieves a list of ScoredChromosomeWindow from a specified Data Source and a specified DAS Type * @param dataSource a {@link DataSource} * @param dasType a {@link DASType} * @return a {@link SCWList} * @throws IOException * @throws ParserConfigurationException * @throws SAXException * @throws ExecutionException * @throws InterruptedException */ public SCWList getSCWList(DataSource dataSource, DASType dasType) throws IOException, ParserConfigurationException, SAXException, InterruptedException, ExecutionException { List<EntryPoint> entryPointList = getEntryPointList(dataSource); List<ListView<ScoredChromosomeWindow>> resultList = new ArrayList<ListView<ScoredChromosomeWindow>>(); for (Chromosome currentChromo: projectChromosomes) { EntryPoint currentEntryPoint = findEntryPoint(entryPointList, currentChromo); // if we found a chromosome retrieve the data and // we create a genelist for this chromosome if (currentEntryPoint != null) { URL queryUrl = generateQuery(dataSource, currentEntryPoint, dasType); URLConnection connection = queryUrl.openConnection(); connection.setUseCaches(true); SAXParserFactory parserFactory = SAXParserFactory.newInstance(); parserFactory.setValidating(true); SAXParser parser = parserFactory.newSAXParser(); SCWHandler scwh = new SCWHandler(currentChromo); // if the current project is a muti genome project we set the // name of the genome that was used for the mapping of the data if (ProjectManager.getInstance().isMultiGenomeProject()) { scwh.setGenomeName(genomeName); scwh.setAlleleType(alleleType); } parser.parse(connection.getInputStream(), scwh); ListView<ScoredChromosomeWindow> currentSCWList = scwh.getScoreChromosomeWindowList(); resultList.add(currentSCWList); } resultList.add(null); } return new SimpleSCWList(resultList); } /** * Retrieves a list of ScoredChromosomeWindow from a specified Data Source and a specified DAS Type and a specified Data Range * @param dataSource a {@link DataSource} * @param dasType a {@link DASType} * @param genomeWindow a {@link GenomeWindow} * @return a {@link SCWList} * @throws IOException * @throws ParserConfigurationException * @throws SAXException * @throws ExecutionException * @throws InterruptedException */ public SCWList getSCWList(DataSource dataSource, DASType dasType, GenomeWindow genomeWindow) throws IOException, ParserConfigurationException, SAXException, InterruptedException, ExecutionException { List<EntryPoint> entryPointList = getEntryPointList(dataSource); List<ListView<ScoredChromosomeWindow>> resultList = new ArrayList<ListView<ScoredChromosomeWindow>>(); Chromosome currentChromo = genomeWindow.getChromosome(); EntryPoint currentEntryPoint = findEntryPoint(entryPointList, currentChromo); // if we found a chromosome retrieve the data and // we create a genelist for this chromosome if (currentEntryPoint != null) { URL queryUrl = generateQuery(dataSource, currentEntryPoint, dasType, genomeWindow); URLConnection connection = queryUrl.openConnection(); connection.setUseCaches(true); SAXParserFactory parserFactory = SAXParserFactory.newInstance(); parserFactory.setValidating(true); SAXParser parser = parserFactory.newSAXParser(); SCWHandler scwh = new SCWHandler(currentChromo); // if the current project is a muti genome project we set the // name of the genome that was used for the mapping of the data if (ProjectManager.getInstance().isMultiGenomeProject()) { scwh.setGenomeName(genomeName); scwh.setAlleleType(alleleType); } parser.parse(connection.getInputStream(), scwh); ListView<ScoredChromosomeWindow> currentSCWList = scwh.getScoreChromosomeWindowList(); resultList.add(currentSCWList); } else { resultList.add(null); } // Check if the list is scored return new SimpleSCWList(resultList); } /** * @param alleleType the alleleType to set */ public void setAlleleType(AlleleType alleleType) { this.alleleType = alleleType; } /** * @param genomeName for multi-genome project only. Name of the genome on which the data were mapped */ public void setGenomeName(String genomeName) { this.genomeName = genomeName; } // public static void main(String[] args) { // try { // long startTime = System.currentTimeMillis(); // DASConnector dasc = new DASConnector("http://genome.ucsc.edu/cgi-bin/das/"); // //DASConnector dasc = new DASConnector("http://www.ensembl.org/das/"); // List<DataSource> dsList = dasc.getDataSourceList(); // DataSource dataSource = dsList.get(0); // System.out.println(dataSource.getID()); // List<DASType> dasTypeList = dasc.getDASTypeList(dataSource); // DASType dasType = dasTypeList.get(39); // System.out.println(dasType.getID()); // //ScoredChromosomeWindowListInterface scwList = dasc.getSCWList(ChromosomeManager.getInstance(), dataSource, dasType); // GeneList geneList = dasc.getGeneList(ChromosomeManager.getInstance(), dataSource, dasType); // GeneListAsBedWriter glabw = new GeneListAsBedWriter(ChromosomeManager.getInstance(), new File("testDAS.bed"), geneList, "test"); // glabw.write(); // int length = (int)((System.currentTimeMillis() - startTime) / 1000l); // System.out.println(length); // } catch (Exception e) { // e.printStackTrace(); // } // } }