/* Copyright (C) 2003 EBI, GRL This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.ensembl.mart.lib; import java.io.IOException; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import java.util.TreeMap; import java.util.logging.Level; import java.util.logging.Logger; import org.ensembl.mart.util.FormattedSequencePrintStream; /** * @author <a href="mailto:dlondon@ebi.ac.uk">Darin London</a> * @author <a href="mailto:craig@ebi.ac.uk">Craig Melsopp</a> */ public abstract class BaseSeqQueryRunner implements QueryRunner { protected int maxColumnLen = 80; //batching numbers protected final int[] batchModifiers = { 5, 2 }; protected int modIter = 0; //start at 0 protected int batchLength = 1000; private final int maxBatchLimit = 750000; // total number of rows execute will ever return // private final int MAXTOTALROWS = 999999999; //big list batching private final int listSizeMax = 1000; private final int maxBigListCount = 1; protected String separator; private Logger logger = Logger.getLogger(BaseSeqQueryRunner.class.getName()); protected Query query = null; protected Attribute[] attributes = null; protected Filter[] filters = null; protected FormatSpec format = null; protected FormattedSequencePrintStream osr; protected SeqWriter seqWriter; protected int totalRows = 0; protected int totalRowsThisExecute = 0; protected int resultSetRowsProcessed = 0; // will count rows processed for a given ResultSet batch protected int lastID = -1; protected int lastIDRowsProcessed = 0; // will allow process to skip rows already processed in previous batch, for a given ID protected TreeMap iDs = new TreeMap(); // holds each objects information, in order protected List fields = new ArrayList(); // holds unique list of resultset description fields from the query // Used for colating required fields protected String queryID, qualifiedQueryID; protected String coordStart, coordEnd; protected String rankField; protected String chrField; protected String strandField; // need to know these indexes specifically protected int queryIDindex = -1; protected int rankIndex = -1; protected int startIndex = -1; protected int endIndex = -1; protected int chromIndex = -1; protected int strandIndex = -1; protected List otherIndices = new ArrayList(); protected DNAAdaptor dna; public BaseSeqQueryRunner(Query query) { this.query = new Query(query); } /** * This method should set the required variables queryID, coordStart, and coordEnd to * the values necessary for the type of sequence being processed, then * add any other displayID fields required for output, etc. to the displayIDs List, * and finally update the Query object with attributes that are necessary to get * the sequence data. */ protected abstract void updateQuery(); /* (non-Javadoc) * @see org.ensembl.mart.lib.QueryRunner#execute(int) */ public void execute(int limit) throws SequenceException, InvalidQueryException { execute(limit, false); } protected ResultSet skipNewBatchRedundantRecords(ResultSet rs) throws SQLException { if (lastID > -1) { //If lastID > -1, we know that there are 1 or more rows to skip before beginning to process again while ((resultSetRowsProcessed < lastIDRowsProcessed) && rs.next()) { //skip through rows already processed for a the last id, incrementing only resultSetRowsprocessed //This will only occur at the beginning of a new ResultSet batch resultSetRowsProcessed++; } } return rs; } /** * This Method should first calculate the indices of the various fields that it requires, * using the ResultSet Object ResultSetMetaData. It should then iterate through each of the * results in the ResultSet, processing them. While processing, if it should encounter a new * keyID, it should write out the results from the lastID, and reset lastIDRowsProcessed to zero. * It should end this loop by incrementing the totalRows, resultSetRowsProcessed and lastIDRowsProcessed * integers, and setting the lastID to the current keyID.intValue. * * @param conn * @param rs * @throws IOException * @throws SQLException */ protected abstract void processResultSet(Connection conn, ResultSet rs) throws IOException, SQLException; protected void writeLastEntry(Connection conn) throws SequenceException { // write the last transcripts data, if present if (lastID > -1) seqWriter.writeSequences(new Integer(lastID), conn); } protected abstract class SeqWriter { abstract void writeSequences(Integer tranID, Connection conn) throws SequenceException; } /* (non-Javadoc) * @see org.ensembl.mart.lib.QueryRunner#execute(int, boolean) */ public void execute(int hardLimit, boolean isSubQuery) throws SequenceException, InvalidQueryException { if (isSubQuery) throw new SequenceException("SubQuerys cannot return sequences\n"); //Some implementations do not need DNA adaptors, so they will have null SeqInfo if (query.getSequenceDescription().getSeqInfo() != null && query.getSequenceDescription().getSeqInfo().length() > 0) dna = new DNAAdaptor(query.getSequenceDescription()); updateQuery(); Filter[] filters = query.getFilters(); Filter bigListFilter = null; String[] biglist = null; int numBigLists = 0; for (int i = 0, n = filters.length; i < n; i++) { Filter filter = filters[i]; if (filter instanceof IDListFilter) { if (((IDListFilter) filter).getIdentifiers().length > listSizeMax) { if (numBigLists > maxBigListCount) throw new InvalidQueryException("Too many in list filters attached, only one per query supported.\n"); bigListFilter = filter; biglist = ((IDListFilter) filter).getIdentifiers(); numBigLists++; } } } if (numBigLists > 0) { boolean moreRows = true; String[] idBatch = new String[listSizeMax]; int batchIter = 0; for (int i = 0, n = biglist.length; moreRows && i < n; i++) { String element = biglist[i]; if ((i > 0) && ((i % listSizeMax) == 0)) { Query newQuery = new Query(query); newQuery.removeFilter(bigListFilter); IDListFilter newFilter = new IDListFilter(bigListFilter.getField(), bigListFilter.getTableConstraint(), bigListFilter.getKey(), idBatch); newQuery.addFilter(newFilter); executeQuery(newQuery, hardLimit); if (hardLimit > 0) moreRows = totalRows < hardLimit; idBatch = new String[listSizeMax]; batchIter = 0; } idBatch[batchIter] = element; batchIter++; } //last batch is either empty, or less than idBatch.length if (moreRows && idBatch[0] != null) { List lastBatch = new ArrayList(); for (int i = 0, n = idBatch.length; i < n; i++) { String element = idBatch[i]; if (element != null) lastBatch.add(element); } String[] lbatch = new String[lastBatch.size()]; lastBatch.toArray(lbatch); Query newQuery = new Query(query); newQuery.removeFilter(bigListFilter); IDListFilter newFilter = new IDListFilter(bigListFilter.getField(), bigListFilter.getTableConstraint(), bigListFilter.getKey(),lbatch); newQuery.addFilter(newFilter); executeQuery(newQuery, hardLimit); } } else { executeQuery(query, hardLimit); } if (dna != null) dna.close(); } protected void executeQuery(Query curQuery, int hardLimit) throws SequenceException, InvalidQueryException { //System.out.println("HARD LIMIT IS\t" + hardLimit); DetailedDataSource ds = curQuery.getDataSource(); if (ds == null) throw new RuntimeException("curQuery.DataSource is null"); if (ds.getDatabaseType().equals("mysql")) { //mySQL solution executeQueryMysql(ds, curQuery, hardLimit); } else { //generic solution executeQueryGeneric(ds, curQuery, hardLimit); } } protected void executeQueryMysql(DetailedDataSource ds, Query curQuery, int hardLimit) throws SequenceException, InvalidQueryException { boolean moreRows = true; boolean userLimit = false; totalRowsThisExecute = 0; attributes = curQuery.getAttributes(); filters = curQuery.getFilters(); Connection conn = null; String sql = null; try { conn = ds.getConnection(); QueryCompiler csql = new QueryCompiler(curQuery,ds); String sqlbase = csql.toSQL(); while (moreRows) { sql = sqlbase; sql += " order by " + qualifiedQueryID; int maxRows = 0; if (hardLimit > 0) { userLimit = true; maxRows = Math.min(batchLength, hardLimit - totalRows); moreRows = false; } else maxRows = batchLength; sql += " LIMIT " + totalRowsThisExecute + "," + maxRows; if (logger.isLoggable(Level.INFO)) logger.info("SQL : " + sql + "\n"); PreparedStatement ps = conn.prepareStatement(sql); int p = 1; for (int i = 0; i < filters.length; ++i) { Filter f = filters[i]; String value = f.getValue(); if (value != null) { logger.info("SQL (prepared statement value) : " + p + " = " + value); ps.setString(p++, value); } } ResultSet rs = ps.executeQuery(); resultSetRowsProcessed = 0; processResultSet(conn, rs); // on the odd chance that the last result set is equal in size to the batchLength, it will need to make an extra attempt. if ((!userLimit) && (resultSetRowsProcessed < batchLength)) moreRows = false; if (batchLength < maxBatchLimit) { batchLength *= batchModifiers[modIter]; modIter = (modIter == 0) ? 1 : 0; } rs.close(); } writeLastEntry(conn); conn.close(); } catch (IOException e) { throw new SequenceException(e); } catch (SQLException e) { throw new InvalidQueryException(e + " :" + sql); } finally { DetailedDataSource.close(conn); } } protected void executeQueryGeneric(DetailedDataSource ds, Query curQuery, int hardLimit) throws SequenceException, InvalidQueryException { boolean moreRows = true; boolean userLimit = false; attributes = curQuery.getAttributes(); filters = curQuery.getFilters(); Connection conn = null; String sql = null; try { conn = curQuery.getDataSource().getConnection(); QueryCompiler csql = new QueryCompiler(curQuery,ds); String sqlbase = csql.toSQL(); while (moreRows) { sql = sqlbase; if (lastID > -1) { if (sqlbase.indexOf("WHERE") >= 0) sql += " and " + qualifiedQueryID + " >= " + lastID; else sql += " WHERE " + qualifiedQueryID + " >= " + lastID; } sql += " order by " + qualifiedQueryID; if (logger.isLoggable(Level.INFO)) { logger.info("SQL : " + sql + "\n"); logger.info("batchLength : " + batchLength + "\n"); } PreparedStatement ps = conn.prepareStatement(sql); if (hardLimit > 0) { userLimit = true; ps.setMaxRows(hardLimit); moreRows = false; } else ps.setMaxRows(batchLength); int p = 1; for (int i = 0; i < filters.length; ++i) { Filter f = filters[i]; String value = f.getValue(); if (value != null) { logger.info("SQL (prepared statement value) : " + p + " = " + value); ps.setString(p++, value); } } ResultSet rs = ps.executeQuery(); resultSetRowsProcessed = 0; processResultSet(conn, skipNewBatchRedundantRecords(rs)); // on the odd chance that the last result set is equal in size to the batchLength, it will need to make an extra attempt. if ((!userLimit) && (resultSetRowsProcessed < batchLength)) moreRows = false; if (batchLength < maxBatchLimit) { batchLength *= batchModifiers[modIter]; modIter = (modIter == 0) ? 1 : 0; } rs.close(); } writeLastEntry(conn); conn.close(); } catch (IOException e) { throw new SequenceException(e); } catch (SQLException e) { throw new InvalidQueryException(e + " :" + sql); } finally { DetailedDataSource.close(conn); } } }