/*
* Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
* Copyright [2016-2017] EMBL-European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ensembl.healthcheck.testcase.compara;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.Statement;
import org.ensembl.healthcheck.DatabaseRegistryEntry;
import org.ensembl.healthcheck.ReportManager;
import org.ensembl.healthcheck.Team;
import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase;
import org.ensembl.healthcheck.util.DBUtils;
/**
* An EnsEMBL Healthcheck test case that checks that all the genome_dbs for a
* method_link_species_set are present in the genomic_aligns
*/
public class CheckGenomicAlignGenomeDBs extends SingleDatabaseTestCase {
/**
* Create an CheckGenomicAlignGenomeDBs that applies to a specific set of
* databases.
*/
public CheckGenomicAlignGenomeDBs() {
setDescription("Check the genome_dbs for a method_link_species_set are present in the genomic_aligns");
setTeamResponsible(Team.COMPARA);
}
/**
* Run the test.
*
* @param dbre
* The database to use.
* @return true if the test passed.
*
*/
public boolean run(DatabaseRegistryEntry dbre) {
boolean result = true;
Connection con = dbre.getConnection();
/**
* Check have entries in the genomic_align table
*/
if (!tableHasRows(con, "genomic_align")) {
ReportManager.problem(this, con,
"No entries in the genomic_align table");
return result;
}
if (!tableHasRows(con, "genomic_align_block")) {
ReportManager.problem(this, con,
"No entries in the genomic_align_block table");
return result;
}
if (!tableHasRows(con, "method_link_species_set")) {
ReportManager.problem(this, con,
"No entries in the method_link_species_set table");
return result;
}
/**
* Get all method_link_species_set_ids for genomic_align_blocks
*/
String[] method_link_species_set_ids = DBUtils
.getColumnValues(con,
"SELECT distinct(method_link_species_set_id) FROM genomic_align_block");
if (method_link_species_set_ids.length > 0) {
for (String mlss_id : method_link_species_set_ids) {
/**
* Expected number of genome_db_ids
*/
String gdb_sql = "SELECT COUNT(*) FROM species_set LEFT JOIN method_link_species_set USING (species_set_id) WHERE method_link_species_set_id = " + mlss_id;
String[] num_genome_db_ids = DBUtils.getColumnValues(con, gdb_sql);
/**
* Find genome_db_ids in genomic_aligns. For speed, first only
* look at the first 100 genomic_align_blocks. If the test fails,
* test all genomic_align_blocks.
* Expect the number of distinct genome_db_ids to be the same as
* the number of genome_db_ids in the species set except when I
* have an ancestor when the number from the genomic_aligns will
* be one larger. Don't specifically test for this, just check
* if it's equal to or larger - more worried if it's smaller ie
* missed some expected genome_db_ids.
*/
String part1_sql = "SELECT COUNT(DISTINCT genome_db_id) FROM (SELECT * FROM genomic_align_block WHERE method_link_species_set_id = " + mlss_id + " ";
String part2_sql = " ) t1 LEFT JOIN genomic_align USING (genomic_align_block_id) LEFT JOIN dnafrag USING (dnafrag_id) HAVING COUNT(DISTINCT genome_db_id) >= (SELECT COUNT(*) FROM species_set LEFT JOIN method_link_species_set USING (species_set_id) WHERE method_link_species_set_id = "
+ mlss_id + " )";
String slow_sql = part1_sql + part2_sql;
String fast_sql = part1_sql + "LIMIT 100" + part2_sql;
String[] success = DBUtils.getColumnValues(con, fast_sql);
boolean all_found = (success.length > 0);
if (!all_found) {
success = DBUtils.getColumnValues(con, slow_sql);
all_found = (success.length > 0);
}
if (all_found) {
ReportManager.correct(this, con, "All genome_dbs are present in the genomic_aligns for method_link_species_set_id " + mlss_id);
} else {
ReportManager.problem(
this,
con,
"Not all the genome_dbs are present in alignment with method_link_species_set_id "
+ mlss_id);
ReportManager.problem(this, con, "USEFUL SQL: " + slow_sql);
result = false;
}
}
}
return result;
}
} // CheckGenomicAlignGenomeDBs