/*
* Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
* Copyright [2016-2017] EMBL-European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ensembl.healthcheck.testcase.generic;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import org.ensembl.healthcheck.DatabaseRegistryEntry;
import org.ensembl.healthcheck.DatabaseType;
import org.ensembl.healthcheck.ReportManager;
import org.ensembl.healthcheck.Team;
import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase;
import org.ensembl.healthcheck.util.DBUtils;
/**
* Check that the assembly table and seq_region table are consistent.
*/
public class AssemblySeqregion extends SingleDatabaseTestCase {
/**
* Create a new AssemlySeqregion test case.
*/
public AssemblySeqregion() {
setDescription("Check that the chromosome lengths from the seq_region table agree with both the assembly table and the karyotype table.");
setTeamResponsible(Team.GENEBUILD);
}
/**
* Data is only tested in core database, as the tables are in sync
*/
public void types() {
removeAppliesToType(DatabaseType.OTHERFEATURES);
removeAppliesToType(DatabaseType.ESTGENE);
removeAppliesToType(DatabaseType.RNASEQ);
removeAppliesToType(DatabaseType.CDNA);
}
/**
* @param dbre
* The database to use.
* @return The test case result.
*/
public boolean run(DatabaseRegistryEntry dbre) {
boolean result = true;
Connection con = dbre.getConnection();
// ---------------------------------------------------
// Find any seq_regions that have different lengths in seq_region &
// assembly
// NB seq_region length should always be equal to (or possibly greater
// than) the maximum
// assembly length
// The SQL returns failures
// ----------------------------------------------------
String sql = "SELECT sr.name AS name, sr.length, cs.name AS coord_system "
+ "FROM seq_region sr, assembly ass, coord_system cs "
+ "WHERE sr.coord_system_id=cs.coord_system_id "
+ "AND ass.asm_seq_region_id = sr.seq_region_id "
+ "GROUP BY ass.asm_seq_region_id "
+ "HAVING sr.length < MAX(ass.asm_end)";
try {
Statement stmt = con.createStatement();
ResultSet rs = stmt.executeQuery(sql);
int i = 0;
while (rs.next() && i++ < 50) {
result = false;
String cs = rs.getString("coord_system");
String sr = rs.getString("name");
ReportManager.problem(this, con, cs + " " + sr
+ " is shorter in seq_region than in assembly");
}
if (i == 0) {
ReportManager
.correct(
this,
con,
"Sequence region lengths are equal or greater in the seq_region table compared to the assembly table");
}
} catch (SQLException e) {
System.err.println("Error executing " + sql + ":");
e.printStackTrace();
}
int cs = DBUtils.getRowCount(con, "SELECT COUNT(*) FROM coord_system");
if (cs > 1) {
int rows = DBUtils
.getRowCount(con, "SELECT COUNT(*) FROM assembly");
if (rows == 0) {
ReportManager.problem(this, con, rows
+ " rows found in assembly table");
} else {
ReportManager.correct(this, con, "Assembly table is populated");
// -------------------------------------------------------
// check various other things about the assembly table
// Check for mismatched lengths of assembled and component
// sides.
// ie where (asm_end - asm_start + 1) != (cmp_end - cmp_start +
// 1)
rows = DBUtils
.getRowCount(
con,
"SELECT COUNT(*) FROM assembly WHERE (asm_end - asm_start + 1) != (cmp_end - cmp_start + 1)");
if (rows > 0) {
ReportManager
.problem(
this,
con,
rows
+ " rows in assembly table have mismatched lengths of assembled and component sides");
} else {
ReportManager
.correct(
this,
con,
"All rows in assembly table have matching lengths of assembled and component sides");
}
// check for start/end < 1
rows = DBUtils
.getRowCount(
con,
"SELECT COUNT(*) FROM assembly WHERE asm_start < 1 OR asm_end < 1 OR cmp_start < 1 OR cmp_end < 1");
if (rows > 0) {
ReportManager
.problem(
this,
con,
rows
+ " rows in assembly table have start or end coords < 1");
} else {
ReportManager
.correct(this, con,
"All rows in assembly table have start and end coords > 0");
}
// check for end < start
rows = DBUtils
.getRowCount(
con,
"SELECT COUNT(*) FROM assembly WHERE asm_end < asm_start OR cmp_end < cmp_start");
if (rows > 0) {
ReportManager
.problem(
this,
con,
rows
+ " rows in assembly table have start or end coords < 1");
} else {
ReportManager
.correct(this, con,
"All rows in assembly table have end coords > start coords");
}
}
}
return result;
} // run
} // ChromosomeLengths