/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ensembl.healthcheck.testcase.generic; import java.sql.Connection; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.DatabaseType; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.Species; import org.ensembl.healthcheck.Team; import org.ensembl.healthcheck.testcase.Priority; import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase; import org.ensembl.healthcheck.util.DBUtils; /** * Checks that mapped stable IDs are correct and mapping tables are correctly * populated * * <p> * Group is <b>check_stable_ids </b> * </p> * * <p> * To be run after the stable ids have been assigned. * </p> */ public class StableIDMapping extends SingleDatabaseTestCase { /** * Create a new instance of StableID. */ public StableIDMapping() { setDescription("Checks stable_id mapping data is valid."); setPriority(Priority.RED); setEffect("Compara will have invalid stable IDs."); setFix("Re-run stable ID mapping or fix manually."); setTeamResponsible(Team.CORE); setSecondTeamResponsible(Team.GENEBUILD); } public void types() { removeAppliesToType(DatabaseType.ESTGENE); removeAppliesToType(DatabaseType.CDNA); removeAppliesToType(DatabaseType.OTHERFEATURES); removeAppliesToType(DatabaseType.RNASEQ); } /** * Run the test. * * @param dbre * The database to use. * @return true if the test passed. * */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); // there are several species where ID mapping is not done Species s = dbre.getSpecies(); result &= checkPrefixes(dbre); if (s != null && s != Species.CAENORHABDITIS_ELEGANS && s != Species.DROSOPHILA_MELANOGASTER && s != Species.SACCHAROMYCES_CEREVISIAE && s != Species.ANOPHELES_GAMBIAE && s != Species.UNKNOWN) { if (dbre.getType() == DatabaseType.CORE) {// for sangervega, do not // check the prefixes result &= checkStableIDEventTypes(con); result = checkStableIDTimestamps(con); } } return result; } // ----------------------------------------------------------- /** * Check that all stable IDs in the table have the correct prefix. The * prefix is defined in Species.java */ private boolean checkPrefixes(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); Map<String, String> tableToLetter = new HashMap<String, String>(); tableToLetter.put("gene", "G"); tableToLetter.put("transcript", "T"); tableToLetter.put("translation", "P"); tableToLetter.put("exon", "E"); Iterator<String> it = tableToLetter.keySet().iterator(); while (it.hasNext()) { String type = (String) it.next(); String table = type; String prefix = Species.getStableIDPrefixForSpecies( dbre.getSpecies(), dbre.getType()); if (prefix == null || prefix == "") { ReportManager.problem(this, con, "Can't get stable ID prefix for " + dbre.getSpecies().toString() + " - please add to Species.java"); result = false; } else { if (prefix.equalsIgnoreCase("IGNORE")) { return true; } String prefixLetter = prefix + (String) tableToLetter.get(type); int wrong = DBUtils.getRowCount(con, "SELECT COUNT(*) FROM " + table + " WHERE stable_id NOT LIKE '" + prefixLetter + "%' AND stable_id NOT LIKE 'LRG%'"); if (wrong > 0) { ReportManager.problem(this, con, wrong + " rows in " + table + " do not have the correct (" + prefixLetter + ") prefix"); result = false; } } } return result; } // ----------------------------------------------------------- /** * Check for any stable ID events where the 'type' column does not match the * identifier type. * */ private boolean checkStableIDEventTypes(Connection con) { boolean result = true; String[] types = { "gene", "transcript", "translation", "exon" }; for (int i = 0; i < types.length; i++) { String type = types[i]; String prefix = getPrefixForType(con, type); String sql = "SELECT COUNT(*) FROM stable_id_event WHERE (old_stable_id LIKE '" + prefix + "%' OR new_stable_id LIKE '" + prefix + "%') AND type != '" + type + "'"; int rows = DBUtils.getRowCount(con, sql); if (rows > 0) { ReportManager .problem( this, con, rows + " rows of type " + type + " (prefix " + prefix + ") in stable_id_event have identifiers that do not correspond to " + type + "s"); result = false; } // check for invalid or missing stable ID versions int nInvalidVersions = DBUtils.getRowCount(con, "SELECT COUNT(*) AS " + type + "_with_invalid_version" + " FROM " + type + " WHERE version < 1 OR version IS NULL;"); if (nInvalidVersions > 0) { ReportManager.problem(this, con, "Invalid versions in " + type); DBUtils.printRows(this, con, "SELECT DISTINCT(version) FROM " + type); result = false; } // make sure stable ID versions in the typeName table matches those // in stable_id_event // for the latest mapping_session String mappingSessionId = DBUtils.getRowColumnValue(con, "SELECT mapping_session_id FROM mapping_session " + "ORDER BY created DESC LIMIT 1"); if (mappingSessionId.equals("")) { ReportManager.info(this, con, "No mapping_session found"); return result; } int nVersionMismatch = DBUtils .getRowCount( con, "SELECT COUNT(*) FROM stable_id_event sie, " + type + " si WHERE sie.mapping_session_id = " + Integer.parseInt(mappingSessionId) + " AND sie.new_stable_id = si.stable_id AND sie.new_version <> si.version"); if (nVersionMismatch > 0) { ReportManager.problem(this, con, "Version mismatch between " + nVersionMismatch + " " + type + " versions in and stable_id_event"); DBUtils.printRows( this, con, "SELECT si.stable_id FROM stable_id_event sie, " + type + " si WHERE sie.mapping_session_id = " + Integer.parseInt(mappingSessionId) + " AND sie.new_stable_id = si.stable_id AND sie.new_version <> si.version"); result = false; } } return result; } // ----------------------------------------------------------- private String getPrefixForType(Connection con, String type) { String prefix = ""; // hope the first row of the type table is correct String stableID = DBUtils.getRowColumnValue(con, "SELECT stable_id FROM " + type + " LIMIT 1"); prefix = stableID.replaceAll("[0-9]", ""); if (prefix.equals("")) { System.err.println("Error, can't get prefix for " + type + " from stable ID " + stableID); } return prefix; } // ----------------------------------------------------------- /** * */ private boolean checkStableIDTimestamps(Connection con) { boolean result = true; String[] types = { "gene", "transcript", "translation", "exon" }; for (int i = 0; i < types.length; i++) { String table = types[i]; String sql = "SELECT COUNT(*) FROM " + table + " WHERE created_date=0 OR modified_date=0"; int rows = DBUtils.getRowCount(con, sql); if (rows > 0) { ReportManager .problem( this, con, rows + " rows in " + table + " have created or modified dates of 0000-00-00 00:00:00"); result = false; } } return result; } }