/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ensembl.healthcheck.testcase.generic; import java.sql.Connection; import java.util.Set; import java.util.Map; import java.util.logging.Logger; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.Team; import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase; import org.ensembl.healthcheck.util.DBUtils; import org.ensembl.healthcheck.util.SqlTemplate; import org.ensembl.healthcheck.util.Utils; import org.ensembl.healthcheck.util.RowMapper; import org.ensembl.healthcheck.util.DefaultMapRowMapper; import org.ensembl.healthcheck.util.DefaultObjectRowMapper; /** * Check that feature co-ords make sense. */ public class FeatureCoords extends SingleDatabaseTestCase { /** * Creates a new instance of CheckFeatureCoordsTestCase */ public FeatureCoords() { setDescription("Check that feature co-ords make sense."); setHintLongRunning(true); setTeamResponsible(Team.GENEBUILD); } public Map<String,Integer> seq_regions; /** * Iterate over each affected database and perform various checks. * * @param dbre * The database to check. * @return True if the test passed. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; String[] featureTables = getCoreFeatureTables(); Connection con = dbre.getConnection(); SqlTemplate t = DBUtils.getSqlTemplate(con); String sql = "SELECT s.seq_region_id,s.length FROM seq_region s join seq_region_attrib a USING (seq_region_id) WHERE a.attrib_type_id = 6"; DefaultMapRowMapper<String, Integer> mapper = new DefaultMapRowMapper<String, Integer>(String.class, Integer.class); seq_regions = t.queryForMap(sql,mapper); for (int tableIndex = 0; tableIndex < featureTables.length; tableIndex++) { String tableName = featureTables[tableIndex]; result &= checkStart(dbre, tableName); result &= checkStartEnd(dbre, tableName); result &= checkLength(dbre, tableName); } // foreach table return result; } // run protected boolean checkLength(DatabaseRegistryEntry dbre, String tableName) { SqlTemplate t = DBUtils.getSqlTemplate(dbre); boolean result = true; if (tableName.equals("repeat_feature")) { return true; } String featureSQL = "SELECT seq_region_id, max(seq_region_start) from " + tableName + " group by seq_region_id "; DefaultMapRowMapper<Integer, Integer> feature_mapper = new DefaultMapRowMapper<Integer, Integer>(Integer.class, Integer.class); Map<Integer, Integer> featureResults = t.queryForMap(featureSQL, feature_mapper); for (Map.Entry<Integer, Integer> entry : featureResults.entrySet()) { Integer max = entry.getValue(); Integer region = entry.getKey(); if (seq_regions.containsKey(region) ) { Integer length = seq_regions.get(region); if (max > length) { ReportManager.problem(this, dbre.getConnection(), "Some features in " + tableName + " start on position " + max + " when region " + region + " is only " + length + " long"); result = false; } } } return result; } protected boolean checkStart(DatabaseRegistryEntry dbre, String tableName) { String sql = "SELECT COUNT(*) FROM " + tableName + " WHERE seq_region_start < 1"; int rows = DBUtils.getRowCount(dbre.getConnection(), sql); if (rows > 0) { ReportManager.problem(this, dbre.getConnection(), rows + " rows in " + tableName + " have seq_region_start < 1"); return false; } else { return true; } } /** * Subroutine to carry out a check on whether the start is after the end. * This is to allow EG to skip this check for circular molecules * * @param dbre * @param tableName * @return true if start is after end */ protected boolean checkStartEnd(DatabaseRegistryEntry dbre, String tableName) { String sql = "SELECT COUNT(*) FROM " + tableName + " WHERE seq_region_start > seq_region_end"; int rows = DBUtils.getRowCount(dbre.getConnection(), sql);; if (rows > 0) { ReportManager.problem(this, dbre.getConnection(), rows + " rows in " + tableName + " have seq_region_start > seq_region_end"); return false; } else { return true; } } } // FeatureCoords