/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ensembl.healthcheck.testcase.eg_core; import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.HashMap; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.DatabaseType; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.Team; import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase; import org.ensembl.healthcheck.util.DBUtils; /** * Check for identically-named seq_regions in different co-ordinate systems. Also check that identically-named seq_regions have the * same length. */ public class SeqRegionCoordSystem extends SingleDatabaseTestCase { /** * Create a new SeqRegionCoordSystem testcase. */ public SeqRegionCoordSystem() { addToGroup("compara-ancestral"); setDescription("Check for identically-named seq_regions in different co-ordinate systems. Also check that identically-named seq_regions have the same length."); setTeamResponsible(Team.GENEBUILD); } /** * Run the test. * * @param dbre * The database to use. * @return true if the test passed. * */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; if (dbre.isMultiSpecies()) { logger.finest("Skipping " + getShortTestName() + " healthcheck for multi-species database " + dbre.getName()); return true; } if (dbre.getType() == DatabaseType.CORE) { result &= checkNames(dbre); } result &= checkLengths(dbre); return result; } // run private boolean checkNames(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); // EG add loop to support multispecies databases for (int speciesId : dbre.getSpeciesIds()) { HashMap coordSystems = new HashMap(); // build hash of co-ord system IDs to names & versions try { Statement stmt = con.createStatement(); ResultSet rs = stmt.executeQuery("SELECT coord_system_id, name, version FROM coord_system where attrib like '%default_version%' and species_id=" + speciesId); while (rs.next()) { coordSystems.put(new Long(rs.getLong("coord_system_id")), rs.getString("name") + ":" + rs.getString("version")); } } catch (SQLException se) { se.printStackTrace(); } // check each pair in turn Long[] coordSystemIDs = (Long[]) coordSystems.keySet().toArray(new Long[coordSystems.size()]); for (int i = 0; i < coordSystemIDs.length; i++) { for (int j = i + 1; j < coordSystemIDs.length; j++) { String csI = (String) coordSystems.get(coordSystemIDs[i]); String csJ = (String) coordSystems.get(coordSystemIDs[j]); int same = DBUtils.getRowCount(con, "SELECT COUNT(*) FROM seq_region s1, seq_region s2 WHERE s1.coord_system_id=" + coordSystemIDs[i] + " AND s2.coord_system_id=" + coordSystemIDs[j] + " AND s1.name = s2.name"); if (same > 0) { ReportManager.problem(this, con, "Co-ordinate systems " + csI + " and " + csJ + " have " + same + " identically-named seq_regions - this may cause problems for ID mapping"); result = false; } else { ReportManager.correct(this, con, "Co-ordinate systems " + csI + " and " + csJ + " have no identically-named seq_regions"); } } // j } // i } return result; } private boolean checkLengths(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); // EG add special code for dealing with multispecies databases for (int speciesId : dbre.getSpeciesIds()) { String query = "SELECT COUNT(*) FROM seq_region s1, seq_region s2, coord_system c1, coord_system c2 " + "WHERE s1.name=s2.name AND s1.coord_system_id != s2.coord_system_id " + "AND c1.coord_system_id=s1.coord_system_id AND c2.coord_system_id=s2.coord_system_id " + "AND s1.length != s2.length and c1.species_id=" + speciesId + " and c2.species_id=" + speciesId + " AND c1.attrib like '%default_version%' AND c2.attrib like '%default_version%'"; // for vega, only report if they are on the same assembly if (dbre.getType() == DatabaseType.SANGER_VEGA || dbre.getType() == DatabaseType.VEGA) { query += " and c1.version=c2.version"; } int rows = DBUtils.getRowCount(con, query); if (rows > 0) { ReportManager.problem(this, con, rows + " seq_regions have the same name but different lengths for species " + speciesId); result = false; } else { ReportManager.correct(this, con, "All seq_region lengths match for species " + speciesId); } } return result; } } // SeqRegionCoordsystem