/*
* Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
* Copyright [2016-2017] EMBL-European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ensembl.healthcheck.testcase.generic;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import org.ensembl.healthcheck.DatabaseRegistryEntry;
import org.ensembl.healthcheck.DatabaseType;
import org.ensembl.healthcheck.ReportManager;
import org.ensembl.healthcheck.Species;
import org.ensembl.healthcheck.Team;
import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase;
import org.ensembl.healthcheck.util.DBUtils;
/**
* Check that: - marker features exist if markers exist - that map_wieghts are
* set to non-zero values - all marker priorities are > 50 - each chromosome has
* some marker features - each chromosome has some marker_map_locations
*
* Currently only checks for human, mouse, rat and zebrafish.
*/
public class MarkerFeatures extends SingleDatabaseTestCase {
// marker priority to warn if under
private static final int MARKER_PRIORITY_THRESHOLD = 50;
// max number of top-level seq regions to check
private static final int MAX_TOP_LEVEL = 100;
/**
* Creates a new instance of MarkerFeatures.
*/
public MarkerFeatures() {
setDescription("Checks that marker_features exist and that they have non-zero map_weights, that marker priorities are sensible and that all chromosomes have some marker features and marker_map_locations");
setTeamResponsible(Team.GENEBUILD);
}
/**
* This test only applies to core databases.
*/
public void types() {
removeAppliesToType(DatabaseType.OTHERFEATURES);
removeAppliesToType(DatabaseType.ESTGENE);
removeAppliesToType(DatabaseType.VEGA);
removeAppliesToType(DatabaseType.CDNA);
removeAppliesToType(DatabaseType.RNASEQ);
}
/**
* Test various things about marker features.
*
* @param dbre
* The database to use.
* @return Result.
*/
public boolean run(DatabaseRegistryEntry dbre) {
boolean result = true;
Connection con = dbre.getConnection();
// only check for human, mouse, rat and zebrafish
Species s = dbre.getSpecies();
if ((dbre.getType() != DatabaseType.SANGER_VEGA && (s
.equals(Species.HOMO_SAPIENS) || s.equals(Species.MUS_MUSCULUS) || s
.equals(Species.RATTUS_NORVEGICUS)))
|| s.equals(Species.DANIO_RERIO)) { // for
// sangervega
// only
// run
// the
// test
// for
// zebrafish
result &= checkFeaturesAndMapWeights(con);
result &= checkAllChromosomesHaveMarkers(con);
}
return result;
} // run
// ----------------------------------------------------------------------
/*
* Verify marker features exist if markers exist, and that map weights are
* non-zero.
*/
private boolean checkFeaturesAndMapWeights(Connection con) {
boolean result = true;
int rowCount = DBUtils.getRowCount(con,
"SELECT COUNT(*) FROM marker_feature");
if (rowCount == 0) {
ReportManager
.problem(this, con,
"No marker features in database even though markers are present");
result = false;
}
int badWeightCount = DBUtils
.getRowCount(
con,
"SELECT marker_id, COUNT(*) AS correct, map_weight FROM marker_feature GROUP BY marker_id HAVING map_weight != correct");
if (badWeightCount > 0) {
ReportManager
.problem(
this,
con,
badWeightCount
+ " marker features have not been assigned correct map weights");
result = false;
}
if (result) {
ReportManager.correct(this, con, "Marker features appear to be ok");
}
return result;
} // checkFeaturesAndMapWeights
// ----------------------------------------------------------------------
/**
* Check that all chromomes have > 0 markers_map_locations and
* marker_features.
*/
private boolean checkAllChromosomesHaveMarkers(Connection con) {
boolean result = true;
// find all the chromosomes, and for each one check that it has some
// markers
// note a "chromosome" is assumed to be a seq_region that is:
// - on the top-level co-ordinate system and
// - doesn't have and _ or . in the name and
// - has a seq_region name of less than 3 characters
// - doesn't have a name starting with "Un" or "MT"
// get top level co-ordinate system ID
String sql = "SELECT coord_system_id FROM coord_system WHERE rank=1 LIMIT 1";
String s = DBUtils.getRowColumnValue(con, sql);
if (s.length() == 0) {
System.err
.println("Error: can't get top-level co-ordinate system for "
+ DBUtils.getShortDatabaseName(con));
return false;
}
int topLevelCSID = Integer.parseInt(s);
try {
// check each top-level seq_region (up to a limit) to see how many
// marker_map_locations and marker features there are
Statement stmt = con.createStatement();
ResultSet rs = stmt
.executeQuery("SELECT * FROM seq_region WHERE coord_system_id="
+ topLevelCSID
+ " AND name NOT LIKE '%\\_%' AND name NOT LIKE '%.%' AND name NOT LIKE 'Un%' AND name NOT LIKE 'MT%' AND LENGTH(name) < 3 ORDER BY name");
int numTopLevel = 0;
while (rs.next() && numTopLevel++ < MAX_TOP_LEVEL) {
long seqRegionID = rs.getLong("seq_region_id");
String seqRegionName = rs.getString("name");
// check marker_map_locations
logger.fine("Counting marker_map_locations on chromosome "
+ seqRegionName);
sql = "SELECT COUNT(*) FROM marker_map_location WHERE chromosome_name='"
+ seqRegionName + "'";
int rows = DBUtils.getRowCount(con, sql);
if (rows == 0) {
ReportManager.problem(this, con, "Chromosome "
+ seqRegionName + " (seq_region_id " + seqRegionID
+ ") has no entries in marker_map_location");
result = false;
} else {
ReportManager.correct(this, con, "Chromosome "
+ seqRegionName + " has " + rows
+ " marker_map_locations");
}
// check marker_features
logger.fine("Counting marker_features on chromosome "
+ seqRegionName);
sql = "SELECT COUNT(*) FROM marker_feature WHERE seq_region_id="
+ seqRegionID;
rows = DBUtils.getRowCount(con, sql);
if (rows == 0) {
ReportManager.problem(this, con, "Chromosome "
+ seqRegionName + " (seq_region_id " + seqRegionID
+ ") has no marker_features");
result = false;
} else {
ReportManager.correct(this, con, "Chromosome "
+ seqRegionName + " has " + rows
+ " marker_features");
}
}
rs.close();
stmt.close();
if (numTopLevel == MAX_TOP_LEVEL) {
logger.warning("Only checked first " + numTopLevel
+ " seq_regions");
}
} catch (SQLException se) {
se.printStackTrace();
}
return result;
}
// ----------------------------------------------------------------------
} // MarkerFeatures