/*
* Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
* Copyright [2016-2017] EMBL-European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ensembl.healthcheck.testcase.generic;
import java.sql.Connection;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.ensembl.healthcheck.DatabaseRegistryEntry;
import org.ensembl.healthcheck.DatabaseType;
import org.ensembl.healthcheck.ReportManager;
import org.ensembl.healthcheck.Team;
import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase;
import org.ensembl.healthcheck.util.DBUtils;
import org.ensembl.healthcheck.util.SqlTemplate;
/**
* Healthcheck for the assembly_exception table.
*/
public class AssemblyExceptions extends SingleDatabaseTestCase {
/**
* Check the assembly_exception table.
*/
public AssemblyExceptions() {
setDescription("Check assembly_exception table");
setTeamResponsible(Team.GENEBUILD);
}
public void types() {
removeAppliesToType(DatabaseType.ESTGENE);
removeAppliesToType(DatabaseType.CDNA);
removeAppliesToType(DatabaseType.VEGA);
removeAppliesToType(DatabaseType.SANGER_VEGA);
removeAppliesToType(DatabaseType.OTHERFEATURES);
removeAppliesToType(DatabaseType.RNASEQ);
}
/**
* Check the data in the assembly_exception table. Note referential integrity checks are done in CoreForeignKeys.
*
* @param dbre
* The database to use.
* @return Result.
*/
public boolean run(DatabaseRegistryEntry dbre) {
boolean result = true;
result &= checkStartEnd(dbre);
result &= seqMapping(dbre);
result &= uniqueRegion(dbre);
result &= checkExternalDB(dbre);
return result;
}
private boolean checkExternalDB(DatabaseRegistryEntry dbre) {
boolean result = false;
SqlTemplate t = DBUtils.getSqlTemplate(dbre);
Connection con = dbre.getConnection();
String unique_sql = "SELECT distinct sr.name FROM seq_region sr, assembly_exception ax, external_db e, "
+ " dna_align_feature daf, analysis a WHERE a.analysis_id = daf.analysis_id AND "
+ " daf.seq_region_id = sr.seq_region_id AND ax.seq_region_id = sr.seq_region_id AND "
+ " e.external_db_id = daf.external_db_id AND logic_name = 'alt_seq_mapping' AND "
+ " exc_type not in ('PAR') AND e.db_name != 'GRC_primary_assembly'" ;
List<String> unique_regions = t.queryForDefaultObjectList(unique_sql, String.class);
if (unique_regions.isEmpty()) {
result = true;
}
for (String region: unique_regions) {
String msg = String.format("Assembly exception %s has a mapping which is not from 'GRC_primary_assembly'", region);
ReportManager.problem(this, dbre.getConnection(), msg);
}
return result;
}
private boolean uniqueRegion(DatabaseRegistryEntry dbre) {
boolean result = false;
SqlTemplate t = DBUtils.getSqlTemplate(dbre);
Connection con = dbre.getConnection();
String unique_sql = "SELECT distinct sr.name FROM seq_region sr, assembly_exception ax, seq_region sr2, "
+ " dna_align_feature daf, analysis a WHERE a.analysis_id = daf.analysis_id AND "
+ " daf.seq_region_id = sr.seq_region_id AND ax.seq_region_id = sr.seq_region_id AND "
+ " ax.exc_seq_region_id = sr2.seq_region_id AND logic_name = 'alt_seq_mapping' AND "
+ " exc_type not in ('PAR') AND sr2.name != hit_name" ;
List<String> unique_regions = t.queryForDefaultObjectList(unique_sql, String.class);
if (unique_regions.isEmpty()) {
result = true;
}
for (String region: unique_regions) {
String msg = String.format("Assembly exception %s maps more than one reference region", region);
ReportManager.problem(this, dbre.getConnection(), msg);
}
return result;
}
private boolean seqMapping(DatabaseRegistryEntry dbre) {
boolean result = false;
SqlTemplate t = DBUtils.getSqlTemplate(dbre);
Connection con = dbre.getConnection();
String all_sql = "SELECT distinct sr.name FROM seq_region sr, assembly_exception ax where ax.seq_region_id = sr.seq_region_id and exc_type not in ('PAR')";
List<String> all_exc = t.queryForDefaultObjectList(all_sql, String.class);
String daf_sql = "SELECT distinct sr.name FROM seq_region sr, assembly_exception ax, dna_align_feature daf, analysis a "
+ " WHERE sr.seq_region_id = ax.seq_region_id AND exc_type not in ('PAR') AND sr.seq_region_id = daf.seq_region_id "
+ " AND daf.analysis_id = a.analysis_id AND a.logic_name = 'alt_seq_mapping'";
List<String> daf_exc = t.queryForDefaultObjectList(daf_sql, String.class);
Set<String> missing = new HashSet<String>(all_exc);
missing.removeAll(daf_exc);
if(missing.isEmpty()) {
result = true;
}
for(String name: missing) {
String msg = String.format("Assembly exception '%s' does not have results in dna_align_feature table for analysis alt_seq_mapping", name);
ReportManager.problem(this, dbre.getConnection(), msg);
}
return result;
}
private boolean checkStartEnd(DatabaseRegistryEntry dbre) {
Connection con = dbre.getConnection();
boolean result = true;
// check that seq_region_end > seq_region_start
int rows = DBUtils.getRowCount(con, "SELECT COUNT(*) FROM assembly_exception WHERE seq_region_start > seq_region_end");
if (rows > 0) {
result = false;
ReportManager.problem(this, con, "assembly_exception has " + rows + " rows where seq_region_start > seq_region_end");
}
// check that exc_seq_region_start > exc_seq_region_end
rows = DBUtils.getRowCount(con, "SELECT COUNT(*) FROM assembly_exception WHERE exc_seq_region_start > exc_seq_region_end");
if (rows > 0) {
result = false;
ReportManager.problem(this, con, "assembly_exception has " + rows + " rows where exc_seq_region_start > exc_seq_region_end");
}
// If the assembly_exception table contains an exception of type 'HAP' then
// there should be at least one seq_region_attrib row of type 'non-reference'
if (DBUtils.getRowCount(con, "SELECT COUNT(*) FROM assembly_exception WHERE exc_type='HAP'") > 0) {
if (DBUtils.getRowCount(con, "SELECT COUNT(*) FROM seq_region_attrib sra, attrib_type at WHERE sra.attrib_type_id=at.attrib_type_id AND at.code='non_ref'") == 0) {
result = false;
ReportManager.problem(this, con, "assembly_exception contains at least one exception of type 'HAP' but there are no seq_region_attrib rows of type 'non-reference'");
}
}
if (result) {
ReportManager.correct(this, con, "assembly_exception start/end co-ordinates make sense");
}
return result;
}
} // AssemblyException