/* Copyright (C) 2003 EBI, GRL This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.ensembl.mart.lib; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; /** * Object for getting DNA Seqeuence Strings * from the Mart sgp_chunks table. * * @author <a href="mailto:dlondon@ebi.ac.uk">Darin London</a> * @author <a href="mailto:craig@ebi.ac.uk">Craig Melsopp</a> */ public class DNAAdaptor { private Logger logger = Logger.getLogger(DNAAdaptor.class.getName()); private Connection conn; private int chunkSize = 100000; // Size of dna chunks in sgp_chunks table //some prepared statements for later use private PreparedStatement specStmt; private PreparedStatement specSQLFull; private PreparedStatement specSQLSub; /** * DNAAdaptors require a database connection to get sequence from the mart database. * * @param Connection */ public DNAAdaptor(SequenceDescription seqd) throws SequenceException { String schema, dnaTable, startField, chrField, seqField; schema = seqd.getRefDataSource().getSchema(); String[] seqInfo = seqd.getSeqInfo().split("\\,"); //table,chr,start,sequence,chunkSize dnaTable = schema+"."+seqInfo[0]; chrField = seqInfo[1]; startField = seqInfo[2]; seqField = seqInfo[3]; chunkSize = Integer.parseInt( seqInfo[4] ); try { conn = seqd.getRefDataSource().getConnection(); String sqlFull = "select " + seqField + " from " + dnaTable + " where " + startField + " = ? and " + chrField + " = ?"; String sqlSub = "select substring(" + seqField + ", ?, ?) from " + dnaTable + " where " + startField + " = ? and " + chrField + " = ?"; specSQLFull = conn.prepareStatement(sqlFull); specSQLSub = conn.prepareStatement(sqlSub); } catch (SQLException e) { throw new SequenceException("Could not initialize DNAAdaptor Species Statements: " + e.getMessage(), e); } } private byte[] fetchFullChunk(String chr, int start) throws SequenceException { try { PreparedStatement ps = specSQLFull; ps.setInt(1, start); ps.setString(2, chr); ResultSet rs = ps.executeQuery(); rs.next(); byte[] ret = rs.getBytes(1); rs.close(); return ret; } catch (SQLException e) { throw new SequenceException("Could not fetch full sequence chunk " + e.getMessage(), e); } } private byte[] fetchChunkSubstring(String chr, int start, int chunkStart, int length) throws SequenceException { try { int coord = start - chunkStart + 1; PreparedStatement ps = specSQLSub; ps.setInt(1, coord); ps.setInt(2, length); ps.setInt(3, chunkStart); ps.setString(4, chr); ResultSet rs = ps.executeQuery(); rs.next(); byte[] ret = rs.getBytes(1); rs.close(); return ret; } catch (SQLException e) { throw new SequenceException("Could not fetch chunk substring " + e.getMessage(), e); } } private byte[] fetchSequence(String chr, int start, int length) throws SequenceException { int chunkStart = start - ( ( start - 1 ) % chunkSize ); if (start == chunkStart && length == chunkSize) return fetchFullChunk(chr, chunkStart); else return fetchChunkSubstring(chr, start, chunkStart, length); } private byte[] fetchResidualSequence(String chr, int start, int length, byte[] initialSeq) throws SequenceException { List bytes = new ArrayList(); bytes.add(initialSeq); int currentLength = initialSeq.length; int currentStart = start + currentLength; while (currentLength < length) { int residual = length - currentLength; byte[] currentBytes = fetchSequence(chr, currentStart, residual); if (currentBytes.length < 1) break; bytes.add(currentBytes); currentLength += currentBytes.length; currentStart = start + currentLength; } //iterate through bytes to fill sequence byte[] byte[] sequence = new byte[currentLength]; int nextPos = 0; for (int i = 0, n = bytes.size(); i < n; i++) { byte[] thisChunk = (byte[]) bytes.get(i); System.arraycopy(thisChunk, 0, sequence, nextPos, thisChunk.length); nextPos += thisChunk.length; } bytes = null; return sequence; } /** * Gets the Sequence for a given species, chr, start and end. * Checks to see if there is cached sequence applicable for * this query, and slices that sequence for its results. * * @return String sequence */ public byte[] getSequence(String chr, int start, int end) throws SequenceException { int len = (end - start) + 1; byte[] retBytes = fetchSequence(chr, start, len); if (retBytes.length < 1) { if (logger.isLoggable(Level.INFO)) logger.info("No Sequence Returned for request: chromosome = " + chr + ", start = " + start + " end = " + end + "\n"); return Npad(len); } if (retBytes.length < len) retBytes = fetchResidualSequence(chr, start, len, retBytes); //user may ask for more sequence than is available, return as much as possible if (retBytes.length < len && logger.isLoggable(Level.INFO)) logger.info("Warning, not enough sequence to satisfy request: requested " + len + " returning " + retBytes.length + "\n"); return retBytes; } public void close() { try { specSQLFull.close(); specSQLSub.close(); } catch (SQLException e) { //ignore } DetailedDataSource.close(conn); } /** * returns a byte[] of "N"s of a given length * @param length * @return */ private byte[] Npad(int length) { byte[] nseq = new byte[length]; for (int i = 0; i < length; i++) nseq[i] = 'N'; return nseq; } }