/* * #%L * gitools-biomart * %% * Copyright (C) 2013 Universitat Pompeu Fabra - Biomedical Genomics group * %% * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program. If not, see * <http://www.gnu.org/licenses/gpl-3.0.html>. * #L% */ package org.gitools.datasources.biomart.restful; import org.gitools.api.analysis.IProgressMonitor; import org.gitools.api.persistence.FileFormat; import org.gitools.datasources.biomart.BiomartService; import org.gitools.datasources.biomart.BiomartServiceException; import org.gitools.datasources.biomart.queryhandler.BiomartQueryHandler; import org.gitools.datasources.biomart.queryhandler.TsvFileQueryHandler; import org.gitools.datasources.biomart.restful.model.*; import org.gitools.datasources.biomart.settings.BiomartSource; import org.gitools.ui.platform.settings.Settings; import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBException; import javax.xml.bind.Unmarshaller; import java.io.*; import java.net.*; import java.util.ArrayList; import java.util.List; public class BiomartRestfulService implements BiomartService { private static final String FORMAT_TSV = "TSV"; private static final String FORMAT_TSV_GZ = "GZ"; private final FileFormat[] supportedFormats = new FileFormat[]{new FileFormat("Tab Separated Fields", "tsv", true, false), new FileFormat("Tab Separated Fields GZip compressed", "tsv.gz", true, false)}; private final BiomartSource source; private final String restUrl; public BiomartRestfulService(BiomartSource source) throws BiomartServiceException { this.source = source; restUrl = composeUrl(source.getHost(), source.getPort(), source.getRestPath()); //System.out.println(">>>>> " + restUrl); } /** * Method for building full url string from a host, port, and a destPath */ private String composeUrl(String host, String port, String destPath) { StringBuilder sb = new StringBuilder(); if (host != null && !host.isEmpty()) { sb.append("http://").append(host); } if (port != null && !port.isEmpty()) { sb.append(':').append(port); } if (destPath != null && !destPath.isEmpty()) { if (!destPath.startsWith("/")) { sb.append('/'); } sb.append(destPath); } return sb.toString(); } private <T> T xmlGET(String url, Class<T> responseClass) throws IOException, JAXBException { URL u = new URL(url); HttpURLConnection conn; conn = getHttpURLConnection(u); conn.setRequestMethod("GET"); conn.connect(); JAXBContext jc = JAXBContext.newInstance(responseClass); Unmarshaller unm = jc.createUnmarshaller(); T response = (T) unm.unmarshal(conn.getInputStream()); return (T) response; } private HttpURLConnection getHttpURLConnection(URL u) throws IOException { HttpURLConnection conn; if (Settings.get().isProxyEnabled()) { Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(Settings.get().getProxyHost(), Settings.get().getProxyPort())); conn = (HttpURLConnection) u.openConnection(proxy); } else { conn = (HttpURLConnection) u.openConnection(); } return conn; } /** * Given a Biomart service and a data set produces a datasetConfig * * @param d * @return * @throws MalformedURLException * @throws IOException * @throws JAXBException */ @Override public DatasetConfig getConfiguration(DatasetInfo d) throws BiomartServiceException { final String urlString = restUrl + "?type=configuration&dataset=" + d.getName() + "&virtualSchema=" + d.getInterface(); DatasetConfig ds = null; try { ds = xmlGET(urlString, DatasetConfig.class); } catch (Throwable cause) { throw new BiomartServiceException(cause); } return ds; } @Override public List<MartLocation> getRegistry() throws BiomartServiceException { MartRegistry reg = null; final String urlString = restUrl + "?type=registry"; try { reg = xmlGET(urlString, MartRegistry.class); } catch (Throwable cause) { throw new BiomartServiceException(cause); } if (reg == null) { return new ArrayList<MartLocation>(0); } return reg.getLocations(); } @Override public List<DatasetInfo> getDatasets(MartLocation mart) throws BiomartServiceException { final String urlString = restUrl + "?type=datasets&mart=" + mart.getName(); List<DatasetInfo> ds = new ArrayList<DatasetInfo>(); try { URL url = new URL(urlString); HttpURLConnection conn = getHttpURLConnection(url); conn.setRequestMethod("GET"); conn.connect(); InputStream is = conn.getInputStream(); BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8")); String s = null; DatasetInfo d = null; while ((s = reader.readLine()) != null) { if (!s.equals(" ") && !s.equals("\n")) { String f[] = s.split("\t"); d = new DatasetInfo(); d.setType(f[0]); d.setName(f[1]); d.setDisplayName(f[2]); d.setVisible(Integer.valueOf(f[3])); d.setInterface(f[7]); ds.add(d); } } } catch (Throwable cause) { throw new BiomartServiceException(cause); } return ds; } @Override public List<AttributePage> getAttributes(MartLocation mart, DatasetInfo dataset) throws BiomartServiceException { DatasetConfig dc = getConfiguration(dataset); return dc.getAttributePages(); } @Override public List<FilterPage> getFilters(MartLocation mart, DatasetInfo dataset) throws BiomartServiceException { DatasetConfig dc = getConfiguration(dataset); if (dc.getFilterPages() != null && dc.getFilterPages().size() > 0) { return dc.getFilterPages(); } else { return new ArrayList<FilterPage>(); } } //FIXME Use JAXB !!! //FIXME review filter xml parsing private String createQueryXml(Query query, String format, boolean encoded) { StringWriter sw = new StringWriter(); sw.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE Query>"); sw.append("<Query"); sw.append(" virtualSchemaName=\"").append(query.getVirtualSchemaName()).append('"'); sw.append(" header=\"").append("" + query.getHeader()).append('"'); sw.append(" uniqueRows=\"").append("" + query.getUniqueRows()).append('"'); sw.append(" count=\"").append("" + query.getCount()).append('"'); sw.append(" formatter=\"").append(format).append('"'); sw.append(" datasetConfigVersion=\"0.7\">"); for (Dataset ds : query.getDatasets()) { sw.append("<Dataset"); sw.append(" name=\"").append(ds.getName()).append('"'); sw.append(" interface=\"default\">"); for (Attribute attr : ds.getAttribute()) sw.append("<Attribute name=\"").append(attr.getName()).append("\" />"); for (Filter flt : ds.getFilter()) { if (flt.getValue() != null && !flt.getValue().equals("")) { sw.append("<Filter name=\"").append(flt.getName()).append("\" "); if (flt.getRadio()) { sw.append("excluded=\"").append(flt.getValue()).append("\""); } else { sw.append("value=\"").append(flt.getValue()).append("\""); } sw.append(" />"); } } sw.append("</Dataset>"); } sw.append("</Query>"); //System.out.println(sw.toString()); if (encoded) { try { return URLEncoder.encode(sw.toString(), "UTF-8"); } catch (UnsupportedEncodingException ex) { return sw.toString(); } } else { return sw.toString(); } } @Override public InputStream queryAsStream(Query query, String format) throws BiomartServiceException { final String queryString = createQueryXml(query, format, true); final String urlString = restUrl + "?query=" + queryString; //System.out.println(">>> " + urlString); //System.out.println(createQueryXml(query, format, false)); try { URL url = new URL(urlString); HttpURLConnection conn = getHttpURLConnection(url); conn.setRequestMethod("GET"); conn.connect(); return conn.getInputStream(); } catch (Exception ex) { throw new BiomartServiceException("Error opening connection with Biomart service", ex); } } @Override public void queryModule(Query query, File file, String format, IProgressMonitor monitor) throws BiomartServiceException { BiomartQueryHandler tableWriter = null; if (format.equals(FORMAT_TSV) || format.equals(FORMAT_TSV_GZ)) { tableWriter = new TsvFileQueryHandler(file, format.equals(FORMAT_TSV_GZ)); } if (tableWriter == null) { throw new BiomartServiceException("Unrecognized format: " + format); } queryModule(query, tableWriter, monitor); if (monitor.isCancelled()) { file.delete(); } } @Override public void queryModule(Query query, BiomartQueryHandler writer, IProgressMonitor monitor) throws BiomartServiceException { InputStream in = queryAsStream(query, FORMAT_TSV); BufferedReader br = new BufferedReader(new InputStreamReader(in)); try { writer.begin(); } catch (Exception ex) { throw new BiomartServiceException(ex); } TimeCounter speedTimer = new TimeCounter(); long speedBytes = 0; try { String next = null; while ((next = br.readLine()) != null && !monitor.isCancelled()) { String[] fields = next.split("\t"); if (fields.length == 2 && !fields[0].isEmpty() && !fields[1].isEmpty()) { writer.line(fields); } speedBytes += next.length(); double seconds = speedTimer.getElapsedSeconds(); if (seconds >= 1.0) { double speed = (speedBytes / 1024.0) / seconds; monitor.info(String.format("%.1f Kb/s", speed)); speedBytes = 0; speedTimer.reset(); } } } catch (Exception ex) { throw new BiomartServiceException("Error parsing Biomart query results.", ex); } finally { writer.end(); } //log.info("queryModule: elapsed time " + time.toString()); } /** * query a table * * @param query * @param file * @param format * @param skipRowsWithEmptyValues whether skip or not rows having empty values * @param emptyValuesReplacement if empty values not skipped which value to use instead * @param monitor * @throws BiomartServiceException */ @Override public void queryTable(Query query, File file, String format, boolean skipRowsWithEmptyValues, String emptyValuesReplacement, IProgressMonitor monitor) throws BiomartServiceException { BiomartQueryHandler tableWriter = null; if (format.equals(FORMAT_TSV) || format.equals(FORMAT_TSV_GZ)) { tableWriter = new TsvFileQueryHandler(file, format.equals(FORMAT_TSV_GZ)); } if (tableWriter == null) { throw new BiomartServiceException("Unrecognized format: " + format); } queryTable(query, tableWriter, skipRowsWithEmptyValues, emptyValuesReplacement, monitor); if (monitor.isCancelled()) { file.delete(); } } @Override public void queryTable(Query query, BiomartQueryHandler writer, boolean skipRowsWithEmptyValues, String emptyValuesReplacement, IProgressMonitor monitor) throws BiomartServiceException { TimeCounter time = new TimeCounter(); InputStream in = queryAsStream(query, FORMAT_TSV); BufferedReader br = new BufferedReader(new InputStreamReader(in)); try { writer.begin(); } catch (Exception ex) { throw new BiomartServiceException(ex); } TimeCounter speedTimer = new TimeCounter(); long speedBytes = 0; try { String next = null; while ((next = br.readLine()) != null && !monitor.isCancelled()) { String[] fields = next.split("\t"); boolean hasEmptyValues = false; for (int i = 0; i < fields.length; i++) { hasEmptyValues |= fields[i].isEmpty(); if (fields[i].isEmpty()) { fields[i] = emptyValuesReplacement; } } speedBytes += next.length(); double seconds = speedTimer.getElapsedSeconds(); if (seconds >= 1.0) { double speed = (speedBytes / 1024.0) / seconds; monitor.info(String.format("%.1f Kb/s", speed)); speedBytes = 0; speedTimer.reset(); } if (!(skipRowsWithEmptyValues && hasEmptyValues)) { writer.line(fields); } } } catch (Exception ex) { throw new BiomartServiceException("Error parsing Biomart query results.", ex); } finally { writer.end(); } } }