/*
* Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
* Copyright [2016-2017] EMBL-European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ensembl.healthcheck.testcase.eg_compara;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.ensembl.healthcheck.DatabaseRegistryEntry;
import org.ensembl.healthcheck.ReportManager;
import org.ensembl.healthcheck.testcase.AbstractTemplatedTestCase;
import org.ensembl.healthcheck.testcase.EnsTestCase;
import org.ensembl.healthcheck.util.SqlTemplate;
import org.ensembl.healthcheck.util.SqlTemplate.ResultSetCallback;
abstract public class AbstractControlledRows extends AbstractTemplatedTestCase {
protected Connection masterDbConn;
protected SqlTemplate masterSqlTemplate;
protected void init() {
DatabaseRegistryEntry masterDbRe = getComparaMasterDatabase();
masterDbConn = masterDbRe.getConnection();
masterSqlTemplate = getSqlTemplate(masterDbConn);
}
protected boolean checkRangeOfRowsInTable(
final String controlledTableToTest,
final String masterTable,
DatabaseRegistryEntry testDbre,
DatabaseRegistryEntry masterDbRe,
int limit,
int offset
) {
return checkRangeOfRowsInTable(
controlledTableToTest,
masterTable,
testDbre,
masterDbRe,
"",
limit,
offset
);
}
/**
* For every row of the table controlledTableToTest in the database
* testDbre this checks, if this row also exists in the table
* masterTable of masterDbRe.
*
*/
protected boolean checkRangeOfRowsInTable(
final String controlledTableToTest,
final String masterTable,
DatabaseRegistryEntry testDbre,
DatabaseRegistryEntry masterDbRe,
String whereClause,
int limit,
int offset
) {
final Connection testDbConn = testDbre.getConnection();
final Connection masterconn = masterDbRe.getConnection();
final SqlTemplate sqlTemplateTestDb = getSqlTemplate(testDbConn);
final SqlTemplate sqlTemplateComparaMaster = getSqlTemplate(masterconn);
String fetchAllRowsFromTableSql = generateFetchAllRowsFromTableSql(testDbConn, controlledTableToTest, whereClause, limit, offset);
final EnsTestCase thisTest = this;
final List<String> testTableColumns = getColumnsOfTable(testDbConn, controlledTableToTest);
boolean result = sqlTemplateTestDb.execute(
fetchAllRowsFromTableSql,
new ResultSetCallback<Boolean>() {
@Override public Boolean process(ResultSet rs) throws SQLException {
rs.setFetchSize(batchSize);
boolean allRowsPresentInMasterDb = true;
while (rs.next() && !numReportedRowsExceedsMaximum()) {
boolean currentRowPresentInMasterDb = isCurrentRowInMaster(
rs,
sqlTemplateComparaMaster,
masterTable,
testTableColumns
);
allRowsPresentInMasterDb &= currentRowPresentInMasterDb;
if (!currentRowPresentInMasterDb) {
numReportedRows++;
if (numReportedRowsExceedsMaximum()) {
ReportManager.problem(
thisTest,
testDbConn,
"The maximum of " + getMaxReportedMismatches() + " reported rows has been reached, no further rows will be tested."
);
} else {
ReportManager.problem(
thisTest,
testDbConn,
"Row not found in master: " + resultSetRowAsString(rs)
);
}
}
}
return allRowsPresentInMasterDb;
}
},
// No bound parameters
//
new Object[0]
);
return result;
}
/**
*
* Will check, if the current for of the ResultSet is present in the master database.
*
* The columns are passed in each time so this doesn't have to be generated for each
* call.
*
* @param masterTableName
* @param sqlTemplateComparaMaster
* @param columns
* @param rsFromTestDb
* @throws SQLException
*/
protected boolean isCurrentRowInMaster(
final ResultSet rsFromTestDb,
final SqlTemplate sqlTemplateComparaMaster,
final String masterTableName,
final List<String> columns
) throws SQLException {
int numColumns = rsFromTestDb.getMetaData().getColumnCount();
List<Object> columnValuesObjects = new ArrayList<Object>(numColumns);
for(int currentColIndex=0; currentColIndex<numColumns; currentColIndex++) {
Object value = rsFromTestDb.getObject(currentColIndex+1);
columnValuesObjects.add(currentColIndex, value);
}
String countMatchingRowsSql = "select count(*) from " + masterTableName + " where " + asParameterisedWhereClause(columns, columnValuesObjects);
final EnsTestCase thisTest = this;
boolean isInMasterDb = sqlTemplateComparaMaster.execute(
countMatchingRowsSql,
new ResultSetCallback<Boolean>() {
@Override public Boolean process(ResultSet rsFromMaster) throws SQLException {
int numColumns = rsFromMaster.getMetaData().getColumnCount();
if (numColumns!=1) {
throw new RuntimeException(
"Expected one column, but got " + numColumns +
" instead!" + resultSetRowAsString(rsFromMaster)
);
}
rsFromMaster.next();
int numberOfMatchingRowsInMaster = rsFromMaster.getInt(1);
if (numberOfMatchingRowsInMaster==1) {
return true;
}
if (numberOfMatchingRowsInMaster==0) {
return false;
}
ReportManager.problem(thisTest, rsFromMaster.getStatement().getConnection(),
"Found " + numberOfMatchingRowsInMaster + " "
+ "matching rows in the master database!\n"
+ "The row searched for was:\n"
+ resultSetRowAsString(rsFromTestDb)
);
// We return true, because there is a row in the master
// database. The tested database has passed for this row,
// it is the master database that has the problem.
//
return true;
}
},
columnValuesObjects.toArray()
);
return isInMasterDb;
}
/**
*
* Creates a where clause for a sql statement of the form column_1=? and
* column_2=? ... column_n=?. The listOfValues parameter is used to
* determine whether a value will be compared with "=" or with "is". By
* default "=" is used, but "is" will be used for null values like
* "... and column_i is null".
*
* @param listOfColumns
* @param listOfValues
* @return where clause
*/
protected String asParameterisedWhereClause(List<String> listOfColumns, List<Object> listOfValues) {
int numColumns = listOfColumns.size();
int numValues = listOfValues.size();
if (numColumns != numValues) {
throw new IllegalArgumentException(
"listOfColumns ("+listOfColumns.size()+") does not have the "
+"same size as listOfValues ("+listOfValues.size()+")!"
);
}
StringBuffer whereClause = new StringBuffer();
for(int i=0; i<numColumns; i++) {
// Join the individual conditions with "and", but don't start the
// where clause with an "and".
//
String joiner;
if (i==0) {
joiner="";
} else {
joiner=" and ";
}
// Tests for null values have to be done with "is" and not with
// "=?". The latter would always evaluate to false.
//
if (listOfValues.get(i) == null) {
whereClause.append(joiner + listOfColumns.get(i) + " is ?");
} else {
whereClause.append(joiner + listOfColumns.get(i) + "=?");
}
}
return whereClause.toString();
}
/**
*
* For the given ResultSet object this will return a stringified version
* of the current row. Useful to print in error or debug messages.
*
* @param rs
* @return row as string
* @throws SQLException
*/
protected String resultSetRowAsString(ResultSet rs)
throws SQLException {
int numColumns = rs.getMetaData().getColumnCount();
List<String> columnValuesStringy = new ArrayList<String>(numColumns);
for(int currentColIndex=0; currentColIndex<numColumns; currentColIndex++) {
Object value = rs.getObject(currentColIndex+1);
String label = rs.getMetaData().getColumnName(currentColIndex+1);
String convertedValue = label + "=";
if (value==null) {
convertedValue += "<null>";
} else {
convertedValue += value.toString();
}
columnValuesStringy.add(currentColIndex, convertedValue);
}
return asCommaSeparatedString(columnValuesStringy);
}
/**
* Joins the list of strings into one comma (and space) separated string.
*
* @param listOfStrings
* @return list as string
*/
protected String asCommaSeparatedString(List<String> listOfStrings) {
return joinListOfStrings(listOfStrings, ", ");
}
/**
*
* Joins a list of strings with a separator.
*
* @param listOfStrings
* @param separator
* @return list as string
*/
protected String joinListOfStrings(List<String> listOfStrings, String separator) {
int numStrings = listOfStrings.size();
StringBuffer commaSeparated = new StringBuffer();
commaSeparated.append(listOfStrings.get(0));
for(int i=1; i<numStrings; i++) {
commaSeparated.append(separator + listOfStrings.get(i));
}
return commaSeparated.toString();
}
/**
*
* Generates a sql statement that will fetch all columns of all rows from
* the given table.
*
* @param conn
* @param tableName
* @return SQL statement
*/
protected String generateFetchAllRowsFromTableSql(
Connection conn,
String tableName,
int limit,
int offset
) {
return generateFetchAllRowsFromTableSql(conn, tableName, "", limit, offset);
}
protected String generateFetchAllRowsFromTableSql(
Connection conn,
String tableName,
String whereClause,
int limit,
int offset
) {
List<String> columns = getColumnsOfTable(conn, tableName);
String sql = fetchAllRowsFromTableSql(conn, tableName, columns, whereClause, limit, offset);
return sql;
}
/**
*
* Generates a sql statement that will fetch the given columns of all rows
* of the table.
*
* @param conn
* @param tableName
* @param columns
* @return SQL statement
*/
protected String fetchAllRowsFromTableSql(
Connection conn,
String tableName,
List<String> columns,
int limit,
int offset
) {
return fetchAllRowsFromTableSql(conn, tableName, columns, limit, offset);
}
protected String fetchAllRowsFromTableSql(
Connection conn,
String tableName,
List<String> columns,
String whereClause,
int limit,
int offset
) {
return "select " + asCommaSeparatedString(columns) + " from " + tableName + " " + whereClause + " limit " + limit + " offset " + offset;
}
/**
* Maximum number of rows to be fetched in one iteration;
*/
protected final int batchSize = 1000;
protected boolean numReportedRowsExceedsMaximum() {
return numReportedRows>getMaxReportedMismatches();
}
/**
* If the ComparisonStrategy RowByRow
* is used, we want to make sure not to report excessive an amount of
* errors.
*
* The maximum amount of mismatches that this test is allowed to report
* when using ComparisonStrategy.RowByRow.
*
*/
protected int getMaxReportedMismatches() {
return 50;
}
/**
* Number of rows that have been reported by this test, if
* ComparisonStrategy RowByRow is being used. If numReportedRows
* exceeds getMaxReportedMismatches(), the test will terminate.
*
*/
protected int numReportedRows;
protected boolean columnsAreSubset(
final Connection testDbConn,
final Connection masterconn,
final String controlledTableToTest
) {
final List<String> testTableColumns = getColumnsOfTable(testDbConn, controlledTableToTest);
final List<String> masterColumns = getColumnsOfTable(masterconn, controlledTableToTest);
boolean masterHasAllNecessaryColumns = masterColumns.containsAll(testTableColumns);
return masterHasAllNecessaryColumns;
}
/**
*
* Returns the names of all columns for a given table.
*
* @param conn
* @param table
* @return names of columns
*/
protected List<String> getColumnsOfTable(Connection conn, String table) {
List<String> columnsOfTable;
try {
DatabaseMetaData md = conn.getMetaData();
columnsOfTable = new ArrayList<String>();
ResultSet rs = md.getColumns(null, null, table, null);
while (rs.next()) {
columnsOfTable.add(rs.getString(4));
}
} catch (SQLException e) {
throw new RuntimeException(e);
}
if (columnsOfTable.size()==0) {
throw new RuntimeException("Got no columns for table " + table);
}
return columnsOfTable;
}
/**
* <p>
* List of method names in the compara master that mean DNA compara is
* run.
* </p>
* <p>
* If you modify this list, please make sure that all method names are
* quoted.
* </p>
*/
protected final List<String> dnaComparaMethods =
Arrays.asList(
new String[] {
"'GenomicAlignBlock.pairwise_alignment'",
"'GenomicAlignBlock.multiple_alignment'",
"'GenomicAlignTree.tree_alignment'",
"'GenomicAlignBlock.constrained_element'"
}
);
/**
* <p>
* Given the production name of a species, checks, if it is linked to a
* compara method that involves dna comparisons.
* </p>
*
* @param speciesName
* @return true if linked to DNA compara
*/
protected boolean speciesConfiguredForDnaCompara(String speciesName) {
String dnaComparaMethodsCommaSep = StringUtils.join(dnaComparaMethods, ", ");
// We use "distinct", but if a species is configured for more than one
// dna compara method, this will still return more than one row.
//
String sql = "select distinct genome_db.genome_db_id, genome_db.name, method_link.class "
+ "from "
+ " genome_db "
+ " join species_set using (genome_db_id) "
+ " join method_link_species_set using (species_set_id) "
+ " join method_link using (method_link_id) "
+ "where "
+ " genome_db.name='" + speciesName + "' "
+ " and method_link.class in ( "
+ dnaComparaMethodsCommaSep
+ " ) ";
List<Integer> dnaMethodsConfigured = masterSqlTemplate.queryForDefaultObjectList(
sql,
Integer.class
);
if (dnaMethodsConfigured.size()>=1) {
return true;
}
if (dnaMethodsConfigured.size()==0) {
return false;
}
throw new RuntimeException(
"Unexpected number of rows returned!\n"
+ "The sql used was:\n\n"
+ sql
);
}
}