package org.ensembl.mart.lib;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* Implimentation of the QueryRunner for executing a Query and
* generating Tabulated output.
* Tabulated output is separated by a field separator specified by
* a FormatSpec object. Any Query can generate tabulated output.
*
* @author <a href="mailto:dlondon@ebi.ac.uk">Darin London</a>
* @author <a href="mailto:craig@ebi.ac.uk">Craig Melsopp</a>
* @see Query
* @see FormatSpec
*/
public final class AttributeQueryRunner implements QueryRunner {
/**
* Constructs a TabulatedQueryRunner object to execute a Query
* and print tabulated output specified by the given FormatSpec
*
* @param query - a Query Object
* @param format - a FormatSpec object
*/
public AttributeQueryRunner(Query query, FormatSpec format, OutputStream os) {
this.query = query;
this.format = format;
this.osr = new PrintStream(os, true); // autoflush true
}
public void execute(int hardLimit) throws SequenceException, InvalidQueryException {
execute(hardLimit, false);
}
public void execute(int hardLimit, boolean isSubQuery) throws SequenceException, InvalidQueryException {
if (hardLimit > 0)
hardLimit = Math.min(hardLimit, MAXTOTALROWS);
else if (!isSubQuery)
hardLimit = MAXTOTALROWS;
Filter[] filters = query.getFilters();
Filter bigListFilter = null;
String[] biglist = null;
int numBigLists = 0;
for (int i = 0, n = filters.length; i < n; i++) {
Filter filter = filters[i];
if (filter instanceof IDListFilter) {
if (((IDListFilter) filter).getIdentifiers().length > listSizeMax) {
if (numBigLists > maxBigListCount)
throw new InvalidQueryException("Too many in list filters attached, only one per query supported.\n");
bigListFilter = filter;
biglist = ((IDListFilter) filter).getIdentifiers();
numBigLists++;
}
}
}
if (numBigLists > 0) {
boolean moreRows = true;
String[] idBatch = new String[listSizeMax];
int batchIter = 0;
for (int i = 0, n = biglist.length; moreRows && i < n; i++) {
String element = biglist[i];
if ((i > 0) && ((i % listSizeMax) == 0)) {
Query newQuery = new Query(query);
newQuery.removeFilter(bigListFilter);
IDListFilter newFilter =
new IDListFilter(
bigListFilter.getField(),
bigListFilter.getTableConstraint(),
bigListFilter.getKey(),
idBatch);
newQuery.addFilter(newFilter);
executeQuery(newQuery, hardLimit);
if (isSubQuery) {
// get all ids for a subQuery
moreRows = true;
} else {
//only execute batches until all are completed, or totalRows == hardLimit
moreRows = totalRows < hardLimit;
}
idBatch = new String[listSizeMax];
batchIter = 0;
}
idBatch[batchIter] = element;
batchIter++;
}
//last batch is either empty, or less than idBatch.length
if (moreRows && idBatch[0] != null) {
List lastBatch = new ArrayList();
for (int i = 0, n = idBatch.length; i < n; i++) {
String element = idBatch[i];
if (element != null)
lastBatch.add(element);
}
String[] lbatch = new String[lastBatch.size()];
lastBatch.toArray(lbatch);
Query newQuery = new Query(query);
newQuery.removeFilter(bigListFilter);
IDListFilter newFilter =
new IDListFilter(bigListFilter.getField(), bigListFilter.getTableConstraint(), bigListFilter.getKey(), lbatch);
newQuery.addFilter(newFilter);
executeQuery(newQuery, hardLimit);
}
} else {
executeQuery(query, hardLimit);
}
}
protected void executeQuery(Query curQuery, int hardLimit) throws SequenceException, InvalidQueryException {
//System.out.println("HARD LIMIT IS\t" + hardLimit);
DetailedDataSource ds = curQuery.getDataSource();
if (ds == null)
throw new RuntimeException("curQuery.DataSource is null");
if (ds.getDatabaseType().equals("mysql") || ds.getDatabaseType().equals("postgres")) {
executeQueryPostgresMysql(ds, curQuery, hardLimit);
} else {
executeQueryGeneric(ds, curQuery, hardLimit);
}
}
protected void executeQueryGeneric(DetailedDataSource ds, Query curQuery, int hardLimit) throws SequenceException, InvalidQueryException {
lastID = -1; // so nothing is skipped
attributes = curQuery.getAttributes();
filters = curQuery.getFilters();
boolean moreRows = true;
Connection conn = null;
String sql = null;
try {
csql = new QueryCompiler(curQuery,ds);
String sqlbase = csql.toSQLWithKey();
String primaryKey = csql.getQualifiedLowestLevelKey();
//queryID = csql.getPrimaryKey();
queryID = csql.getLowestLevelKey();
conn = ds.getConnection();
while (moreRows) {
StringBuffer sqlBuf = new StringBuffer(sqlbase);
if (sqlbase.indexOf("WHERE") >= 0) {
String insert = primaryKey + " >= " + lastID + " AND ";
sqlBuf.insert(sqlbase.indexOf("WHERE") + 6, insert);
} else
sqlBuf.append(" WHERE " + primaryKey + " >= " + lastID);
if (!( curQuery.hasSort() ) )
sqlBuf.append(" ORDER BY " + primaryKey);
sql = sqlBuf.toString();
int maxRows = 0;
if (hardLimit > 0)
maxRows = Math.min(batchLimit, hardLimit - totalRows);
else
maxRows = batchLimit;
if (logger.isLoggable(Level.INFO)) {
logger.info("SQL : " + sql);
}
PreparedStatement ps = conn.prepareStatement(sql);
ps.setMaxRows(maxRows);
//System.out.println("MAX ROWS\t" + maxRows);
int p = 1;
for (int i = 0, n = filters.length; i < n; ++i) {
Filter f = curQuery.getFilters()[i];
String value = f.getValue();
if (value != null) {
logger.fine("SQL (prepared statement value) : " + p + " = " + value);
ps.setString(p++, value);
}
}
ResultSet rs = ps.executeQuery();
resultSetRowsProcessed = 0;
processResultSetGeneric(conn, skipNewBatchRedundantRecords(rs));
// on the odd chance that the last result set is equal in size to the batchLength, it will need to make an extra attempt.
if (resultSetRowsProcessed < batchLimit) {
// this is the last batch - hence set moreRows to false
moreRows = false;
}
if (batchLimit < maxBatchLimit) {
batchLimit =
(batchLimit * batchModifiers[modIter] < maxBatchLimit)
? batchLimit * batchModifiers[modIter]
: maxBatchLimit;
modIter = (modIter == 0) ? 1 : 0;
}
//else
//batchLimit += linearIncrease;
ps.close();
rs.close();
}
} catch (IOException e) {
if (logger.isLoggable(Level.WARNING))
logger.warning("Couldnt write to OutputStream\n" + e.getMessage());
throw new InvalidQueryException(e);
} catch (SQLException e) {
if (logger.isLoggable(Level.WARNING))
logger.warning(e.getMessage());
throw new InvalidQueryException(e);
} finally {
DetailedDataSource.close(conn);
}
}
protected void executeQueryPostgresMysql(DetailedDataSource ds, Query curQuery, int hardLimit) throws SequenceException, InvalidQueryException {
attributes = curQuery.getAttributes();
filters = curQuery.getFilters();
boolean moreRows = true;
totalRowsThisExecute = 0;
Connection conn = null;
String sql = null;
try {
csql = new QueryCompiler(curQuery,ds);
String sqlbase = csql.toSQL();
conn = ds.getConnection();
while (moreRows) {
sql = sqlbase;
int maxRows = 0;
if (hardLimit > 0)
maxRows = Math.min(batchLimit, hardLimit - totalRows);
else
maxRows = batchLimit;
if (ds.getDatabaseType().equals("mysql")) {sql += " LIMIT " + totalRowsThisExecute + "," + maxRows;} //;(maxRows - lastIDRowsProcessed);
if (ds.getDatabaseType().equals("postgres")) {
//int pslimit=maxRows-totalRowsThisExecute;
sql += " LIMIT " + maxRows + " OFFSET " + totalRowsThisExecute;}
if (logger.isLoggable(Level.INFO))
logger.info("SQL : " + sql);
PreparedStatement ps = conn.prepareStatement(sql);
int p = 1;
for (int i = 0, n = filters.length; i < n; ++i) {
Filter f = curQuery.getFilters()[i];
String value = f.getValue();
if (value != null) {
logger.fine("SQL (prepared statement value) : " + p + " = " + value);
ps.setString(p++, value);
}
}
ResultSet rs = ps.executeQuery();
resultSetRowsProcessed = 0;
processResultSetMysql(conn, rs);
// on the odd chance that the last result set is equal in size to the batchLength, it will need to make an extra attempt.
if (resultSetRowsProcessed < batchLimit) {
// this is the last batch - hence set moreRows to false
moreRows = false;
}
if (batchLimit < maxBatchLimit) {
batchLimit =
(batchLimit * batchModifiers[modIter] < maxBatchLimit) ? batchLimit * batchModifiers[modIter] : maxBatchLimit;
modIter = (modIter == 0) ? 1 : 0;
}
//else
//batchLimit += linearIncrease;
rs.close();
}
} catch (IOException e) {
if (logger.isLoggable(Level.WARNING))
logger.warning("Couldnt write to OutputStream\n" + e.getMessage());
throw new InvalidQueryException(e);
} catch (SQLException e) {
if (logger.isLoggable(Level.WARNING))
logger.warning(e.getMessage());
throw new InvalidQueryException(e);
} finally {
DetailedDataSource.close(conn);
}
}
protected ResultSet skipNewBatchRedundantRecords(ResultSet rs) throws SQLException {
if (lastID > -1) {
//If lastID > -1, we know that there are 1 or more rows to skip before beginning to process again
while ((resultSetRowsProcessed < lastIDRowsProcessed) && rs.next()) {
//skip through rows already processed for a the last id, incrementing only resultSetRowsprocessed
//This will only occur at the beginning of a new ResultSet batch
resultSetRowsProcessed++;
}
}
//System.out.println(resultSetRowsProcessed + "\t" + lastIDRowsProcessed);
return rs;
}
private void processResultSetMysql(Connection conn, ResultSet rs) throws IOException, SQLException {
while (rs.next()) {
for (int i = 1, nColumns = rs.getMetaData().getColumnCount(); i <= nColumns; ++i) {
if (i > 1)
osr.print(format.getSeparator());
String v = rs.getString(i);
if (v != null)
osr.print(v);
}
osr.print("\n");
if (osr.checkError())
throw new IOException();
totalRows++;
totalRowsThisExecute++;
resultSetRowsProcessed++;
}
}
private void processResultSetGeneric(Connection conn, ResultSet rs) throws IOException, SQLException {
ResultSetMetaData rmeta = rs.getMetaData();
int queryIDindex = 0;
// process columnNames for required attribute indices
for (int i = 1, nColumns = rmeta.getColumnCount(); i <= nColumns; ++i) {
String column = rmeta.getColumnName(i);
if (column.toLowerCase().equals(queryID.toLowerCase()))
queryIDindex = i;
}
if ((lastIDRowsProcessed == maxBatchLimit) && rs.isLast()) {
throw new SQLException("WARNING - MORE THAN 50000 ROWS FOR A SINGLE ID BREAKS THE CURRENT BATCHING SYSTEM");
}
while (rs.next()) {
int currID = rs.getInt(queryIDindex);
//System.out.println("CURR ID" + currID + "\tlastID\t" + lastID);
if (lastID > -1 && lastID != currID) {
lastIDRowsProcessed = 0;
//reset batchLimit to maxBatchLength if it has needed to creep up to finish the last ids results
if (batchLimit > maxBatchLimit)
batchLimit = maxBatchLimit;
}
for (int i = 1, nColumns = rs.getMetaData().getColumnCount(); i <= nColumns; ++i) {
//skip the queryID
if (i != queryIDindex) {
if (i > 1)
osr.print(format.getSeparator());
String v = rs.getString(i);
if (v != null)
osr.print(v);
// else
// osr.print("NULL");
}
}
osr.print("\n");
if (osr.checkError())
throw new IOException();
lastID = currID;
totalRows++;
resultSetRowsProcessed++;
lastIDRowsProcessed++;
}
}
//batching
private final int[] batchModifiers = { 2, 2 };
private int modIter = 0; //start at 0
private int batchLimit = 5000; //5000;
private final int maxBatchLimit = 50000; //50000;
// total number of rows execute will ever return
private final int MAXTOTALROWS = 100000000; //50000;
//allow batchLength to increase by this amount after maxBatchLength has been reached
//this will result in slow response for queries where each id returns a resultset
//larger than maxBatchLimit, but they will eventually finish
//and the system will reset the limit back to the batch for each new id
//this could, concievably, hit a memory limit, so test and tweak
// private final int linearIncrease = 10;
//big list batching
private final int listSizeMax = 1000;
private final int maxBigListCount = 1;
private String queryID = null;
private int lastID = -1;
private int totalRows = 0;
private int totalRowsThisExecute = 0;
private int resultSetRowsProcessed = 0; // will count rows processed for a given ResultSet batch
private int lastIDRowsProcessed = 0;
// will allow process to skip rows already processed in previous batch, for a given ID
private Logger logger = Logger.getLogger(AttributeQueryRunner.class.getName());
private Query query = null;
private QueryCompiler csql;
private Attribute[] attributes = null;
private Filter[] filters = null;
private FormatSpec format = null;
private PrintStream osr;
}