/*
* Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
* Copyright [2016-2017] EMBL-European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ensembl.healthcheck.testcase.generic;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import org.ensembl.healthcheck.DatabaseRegistryEntry;
import org.ensembl.healthcheck.DatabaseType;
import org.ensembl.healthcheck.ReportManager;
import org.ensembl.healthcheck.Species;
import org.ensembl.healthcheck.Team;
import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase;
import org.ensembl.healthcheck.util.DBUtils;
/**
* Check various things about ID mapping-related tables.
*/
public class MappingSession extends SingleDatabaseTestCase {
// historical names to ignore when doing format checking
private String[] ignoredNames = { "homo_sapiens_core_120" };
/**
* Create a new MappingSession healthcheck.
*/
public MappingSession() {
setDescription("Checks the mapping session and stable ID tables.");
setTeamResponsible(Team.CORE);
setSecondTeamResponsible(Team.GENEBUILD);
}
/**
* This only really applies to core databases
*/
public void types() {
removeAppliesToType(DatabaseType.OTHERFEATURES);
removeAppliesToType(DatabaseType.ESTGENE);
removeAppliesToType(DatabaseType.VEGA);
removeAppliesToType(DatabaseType.SANGER_VEGA);
removeAppliesToType(DatabaseType.CDNA);
removeAppliesToType(DatabaseType.RNASEQ);
}
/**
* Run the test - check the ID mapping-related tables.
*
* @param dbre
* The database to check.
* @return true if the test passes.
*/
public boolean run(final DatabaseRegistryEntry dbre) {
boolean result = true;
// there are several species where ID mapping is not done
Species s = dbre.getSpecies();
if (s != Species.CAENORHABDITIS_ELEGANS && s != Species.DROSOPHILA_MELANOGASTER && s != Species.SACCHAROMYCES_CEREVISIAE) {
Connection con = dbre.getConnection();
logger.fine("Checking tables exist and are populated");
result &= checkTablesExistAndPopulated(dbre);
logger.fine("Checking DB name format in mapping_session");
result &= checkDBNameFormat(con);
// logger.fine("Checking mapping_session chaining");
// result &= checkMappingSessionChaining(con);
logger.fine("Checking mapping_session old_release and new_release values");
result &= checkOldAndNewReleases(con);
logger.fine("Checking for duplicates in stable_id_event");
result &= checkStableIdEventDuplicates(con);
}
return result;
} // run
// -----------------------------------------------------------------
/**
* Check format of old/new DB names in mapping_session.
*/
private boolean checkDBNameFormat(final Connection con) {
boolean result = true;
String dbNameRegexp = "[A-Za-z]+_[A-Za-z]+_(core|est|estgene|vega)_\\d+_\\d+[A-Za-z]?.*";
String[] sql = {
"SELECT old_db_name from mapping_session WHERE old_db_name <> 'ALL'",
"SELECT new_db_name from mapping_session WHERE new_db_name <> 'LATEST'" };
for (int i = 0; i < sql.length; i++) {
String[] names = DBUtils.getColumnValues(con, sql[i]);
for (int j = 0; j < names.length; j++) {
if (!(names[j].matches(dbNameRegexp)) && !ignoreName(names[j])) {
ReportManager
.problem(
this,
con,
"Database name "
+ names[j]
+ " in mapping_session does not appear to be in the correct format");
result = false;
}
}
}
if (result) {
ReportManager
.correct(this, con,
"All database names in mapping_session appear to be in the correct format");
}
return result;
}
// -----------------------------------------------------------------
/**
* Checks tables exist and have >0 rows. Doesn't check population for
* first-build databases.
*
* @param con
* @return True when all ID mapping-related tables exist and have > 0 rows.
*
*/
private boolean checkTablesExistAndPopulated(
final DatabaseRegistryEntry dbre) {
String[] tables = new String[] { "stable_id_event", "mapping_session",
"gene_archive", "peptide_archive" };
boolean result = true;
Connection con = dbre.getConnection();
for (int i = 0; i < tables.length; i++) {
String table = tables[i];
boolean exists = DBUtils.checkTableExists(con, table);
if (exists) {
// gene_archive and peptide_archive can be empty
if (table.equals("gene_archive")
|| table.equals("peptide_archive")) {
continue;
}
if (DBUtils.countRowsInTable(con, table) == 0) {
ReportManager
.problem(this, con, "Empty table:" + table);
result = false;
}
} else {
ReportManager.problem(this, con, "Missing table:" + table);
result = false;
}
}
return result;
}
// -----------------------------------------------------------------
/**
* Check that all mapping_sessions have new releases that are greater than
* the old releases.
*/
private boolean checkOldAndNewReleases(final Connection con) {
boolean result = true;
try {
Statement stmt = con.createStatement();
// nasty forced cast by adding 0 required since the columns are
// VARCHARS and need to be compared lexicographically
ResultSet rs = stmt
.executeQuery("SELECT mapping_session_id, old_db_name, new_db_name, old_release, new_release FROM mapping_session WHERE old_release+0 >= new_release+0");
while (rs.next()) {
// ignore homo_sapiens_core_18_34 -> homo_sapiens_core_18_34a
// since this was when we didn't change numbers between releases
if (rs.getString("old_db_name").equals(
"homo_sapiens_core_18_34")) {
continue;
}
ReportManager
.problem(
this,
con,
"Mapping session with ID "
+ rs.getLong("mapping_session_id")
+ " ("
+ rs.getString("old_db_name")
+ " -> "
+ rs.getString("new_db_name")
+ ") has a new_release ("
+ rs.getInt("new_release")
+ ") that is not greater than the old release ("
+ rs.getInt("old_release")
+ "). May cause problems with IDHistoryView.");
result = false;
}
} catch (SQLException se) {
se.printStackTrace();
}
if (result) {
ReportManager.correct(this, con,
"All new_release values are greater than old_release.");
}
return result;
}
// -----------------------------------------------------------------
/**
* Check for duplicates in the stable_id_event table
*/
private boolean checkStableIdEventDuplicates(final Connection con) {
boolean result = true;
String sql = "SELECT mapping_session_id, COUNT(*) FROM stable_id_event "
+ "GROUP BY old_stable_id, old_version, new_stable_id, new_version, mapping_session_id, type, score "
+ "HAVING COUNT(*) > 1";
String[] rows = DBUtils.getColumnValues(con, sql);
if (rows.length > 0) {
ReportManager.problem(this, con, rows.length
+ " duplicates in stable_id_event");
result = false;
} else {
ReportManager
.correct(this, con, "No duplicates in stable_id_event");
}
return result;
}
// -----------------------------------------------------------------
/**
* Certain historical names don't match the new format and should be ignored
* to prevent constant failures.
*/
private boolean ignoreName(String name) {
for (int i = 0; i < ignoredNames.length; i++) {
if (name.equals(ignoredNames[i])) {
return true;
}
}
return false;
}
// -----------------------------------------------------------------
} // MappingSession