/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ensembl.healthcheck.testcase.funcgen; import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.DatabaseType; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.Species; import org.ensembl.healthcheck.Team; import org.ensembl.healthcheck.testcase.Priority; import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase; import org.ensembl.healthcheck.util.DBUtils; /** * Checks the *_stable_id tables to ensure they are populated, have no orphan * references, and have valid versions. Also prints some examples from the table * for checking by eye. * * <p> * Group is <b>check_stable_ids </b> * </p> * * <p> * To be run after the stable ids have been assigned. * </p> */ public class FuncgenStableID extends SingleDatabaseTestCase { /** * Create a new instance of StableID. */ public FuncgenStableID() { addToGroup("post_regulatorybuild"); addToGroup("funcgen");//do we need this group and the funcgen-release group? addToGroup("funcgen-release"); //setHintLongRunning(true);// ?Only take about 10 mins for mouse setTeamResponsible(Team.FUNCGEN); setDescription("Checks regulatory_feature stable_id fields are valid."); setPriority(Priority.RED); setEffect("RegulatoryFeatures will not have valid stable IDs."); setFix("Re-run stable ID mapping or fix manually."); } /** * This only applies to core and Vega databases. */ public void types() { removeAppliesToType(DatabaseType.OTHERFEATURES); removeAppliesToType(DatabaseType.CDNA); removeAppliesToType(DatabaseType.CORE); removeAppliesToType(DatabaseType.VARIATION); removeAppliesToType(DatabaseType.COMPARA); //add COMPARA here? } /** * Run the test. * * @param dbre * The database to use. * @return true if the test passed. * */ public boolean run(DatabaseRegistryEntry dbre) { //boolean result = true; Connection con = dbre.getConnection(); //result &= checkStableIDs(con, "regulatory_feature"); //No need for any of this prefix stuff as we only store ints //Keep just in case of future 'imports' // there are several species where ID mapping is not done //Species s = dbre.getSpecies(); //if (s != Species.CAENORHABDITIS_ELEGANS && s != Species.DROSOPHILA_MELANOGASTER && // s != Species.SACCHAROMYCES_CEREVISIAE && s != Species.ANOPHELES_GAMBIAE) { // result &= checkPrefixes(dbre); // result &= checkStableIDEventTypes(con); //} //result = checkStableIDTimestamps(con); //return result; //Just merge this with checkStableIDs? return checkStableIDs(con, "regulatory_feature"); } /** * Checks that the typeName_stable_id table is valid. The table is valid if it * has >0 rows, and there are no orphan references between typeName table and * typeName_stable_id. Also prints some example data from the * typeName_stable_id table via ReportManager.info(). * * @param con * connection to run queries on. * @param typeName * name of the type to check, e.g. "exon" * @return true if the table and references are valid, otherwise false. */ public boolean checkStableIDs(Connection con, String typeName) { boolean result = true; String stableIDtable = typeName; // + "_stable_id"; //WARNING THis is not truly generic as we always check for regulatory type in feature_set //Need to change this if we ever want to check other stable IDs //Need to do this for each DISPLAYABLE regulatory feature_set //String[] fsetIDs = getColumnValues(conn, "SELECT feature_set_id from feature_set where type='regulatory'"); //This would be better with a ResultSet as we want the nametoo try { Statement stmt = con.createStatement(); String sql = "SELECT fs.feature_set_id, fs.name from feature_set fs, status s, status_name sn " + "where fs.type='regulatory' and fs.feature_set_id=s.table_id and s.table_name='feature_set' " + "and s.status_name_id=sn.status_name_id and sn.name='DISPLAYABLE'"; //System.out.println("Executing " + sql); ResultSet fsetIdNames = stmt.executeQuery(sql); while (fsetIdNames != null && fsetIdNames.next()) { sql = "SELECT count(stable_id) from regulatory_feature where " + "stable_id is not NULL and feature_set_id=" + fsetIdNames.getString(1); //System.out.println("Executing " + sql); int nStableIDs = DBUtils.getRowCount(con, sql); //We could really do with a HC to check we have RegFeats on all main Chrs if (nStableIDs < 1) { ReportManager.problem(this, con, stableIDtable + " contains no valid stable_ids for FeatureSet:\t" + fsetIdNames.getString(2)); result = false; } else{ nStableIDs = DBUtils.getRowCount(con, "SELECT count(stable_id) from regulatory_feature where stable_id " + "is NULL and feature_set_id=" + fsetIdNames.getString(1)); if (nStableIDs > 0) { ReportManager.problem(this, con, stableIDtable + " contains " + nStableIDs + " NULL stable_ids for FeatureSet:\t" + fsetIdNames.getString(2)); result = false; } else{ ReportManager.correct(this, con, "No NULL stable_ids for FeatureSet " + fsetIdNames.getString(2)); } } //Test for duplicates within set //Could remove this if there is a unique feature_set_id, stble_id key? int duplicates = DBUtils.getRowCount(con, "SELECT COUNT(stable_id)-COUNT(DISTINCT stable_id) FROM " + stableIDtable + " WHERE feature_set_id=" + fsetIdNames.getString(1)); if (duplicates > 0) { ReportManager.problem(this, con, stableIDtable + " has " + duplicates + " duplicate stable IDs for FeatureSet:\t" + fsetIdNames.getString(2)); result = false; } else { ReportManager.correct(this, con, "No duplicate stable IDs for FeatureSet:\t" + fsetIdNames.getString(2)); } // check for invalid or missing stable ID versions //int nInvalidVersions = DBUtils.getRowCount(con, "SELECT COUNT(*) AS " + typeName + "_with_invalid_version" + " FROM " + stableIDtable // + " WHERE version < 1 OR version IS NULL;"); // //if (nInvalidVersions > 0) { // ReportManager.problem(this, con, "Invalid " + typeName + " versions in " + stableIDtable); // DBUtils.printRows(this, con, "SELECT DISTINCT(version) FROM " + stableIDtable); // result = false; //} // make sure stable ID versions in the typeName_stable_id table matches those in stable_id_event // for the latest mapping_session //String mappingSessionId = DBUtils.getRowColumnValue(con, "SELECT mapping_session_id FROM mapping_session " + //"ORDER BY created DESC LIMIT 1"); //if (mappingSessionId.equals("")) { // ReportManager.info(this, con, "No mapping_session found"); // return result; //} //int nVersionMismatch = DBUtils.getRowCount(con, "SELECT COUNT(*) FROM stable_id_event sie, " + stableIDtable + // " si WHERE sie.mapping_session_id = " + Integer.parseInt(mappingSessionId) + // " AND sie.new_stable_id = si.stable_id AND sie.new_version <> si.version"); //if (nVersionMismatch > 0) { // ReportManager.problem(this, con, "Version mismatch between " + nVersionMismatch + " " + typeName + " versions in " + // stableIDtable + " and stable_id_event"); // DBUtils.printRows(this, con, "SELECT si.stable_id FROM stable_id_event sie, " + stableIDtable + // " si WHERE sie.mapping_session_id = " + Integer.parseInt(mappingSessionId) + // " AND sie.new_stable_id = si.stable_id AND sie.new_version <> si.version"); // result = false; } }catch (SQLException e){ e.printStackTrace(); } return result; } // ----------------------------------------------------------- /** * Check that all stable IDs in the table have the correct prefix. The prefix * is defined in Species.java */ private boolean checkPrefixes(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); Map tableToLetter = new HashMap(); tableToLetter.put("gene", "G"); tableToLetter.put("transcript", "T"); tableToLetter.put("translation", "P"); tableToLetter.put("exon", "E"); Iterator it = tableToLetter.keySet().iterator(); while (it.hasNext()) { String type = (String) it.next(); String table = type + "_stable_id"; String prefix = Species.getStableIDPrefixForSpecies(dbre.getSpecies(), dbre.getType()); if (prefix == null || prefix == "") { ReportManager.problem(this, con, "Can't get stable ID prefix for " + dbre.getSpecies().toString() + " - please add to Species.java"); result = false; } else { if (prefix.equalsIgnoreCase("IGNORE")) { return true; } String prefixLetter = prefix + (String) tableToLetter.get(type); int wrong = DBUtils.getRowCount(con, "SELECT COUNT(*) FROM " + table + " WHERE stable_id NOT LIKE '" + prefixLetter + "%'"); if (wrong > 0) { ReportManager.problem(this, con, wrong + " rows in " + table + " do not have the correct (" + prefixLetter + ") prefix"); result = false; } else { ReportManager.correct(this, con, "All rows in " + table + " have the correct prefix (" + prefixLetter + ")"); } } } return result; } // ----------------------------------------------------------- /** * Check for any stable ID events where the 'type' column does not match the * identifier type. * */ // private boolean checkStableIDEventTypes(Connection con) { // // boolean result = true; // // String[] types = { "gene", "transcript", "translation" }; // // for (int i = 0; i < types.length; i++) { // // String type = types[i]; // // String prefix = getPrefixForType(con, type); // // String sql = "SELECT COUNT(*) FROM stable_id_event WHERE (old_stable_id LIKE '" + prefix + "%' OR new_stable_id LIKE '" // + prefix + "%') AND type != '" + type + "'"; // // int rows = DBUtils.getRowCount(con, sql); // // if (rows > 0) { // // ReportManager.problem(this, con, rows + " rows of type " + type + " (prefix " + prefix // + ") in stable_id_event have identifiers that do not correspond to " + type + "s"); // result = false; // // } else { // // ReportManager.correct(this, con, "All types in stable_id_event correspond to identifiers"); // // } // } // return result; // // } // // // ----------------------------------------------------------- // // private String getPrefixForType(Connection con, String type) { // // String prefix = ""; // // // hope the first row of the _type_stable_id table is correct // String stableID = DBUtils.getRowColumnValue(con, "SELECT stable_id FROM " + type + "_stable_id LIMIT 1"); // // prefix = stableID.replaceAll("[0-9]", ""); // // if (prefix.equals("")) { // System.err.println("Error, can't get prefix for " + type + " from stable ID " + stableID); // } // // return prefix; // // } ////----------------------------------------------------------- // /** // // * // */ // private boolean checkStableIDTimestamps(Connection con) { // // boolean result = true; // // String[] types = { "gene", "transcript", "translation" }; // // for (int i = 0; i < types.length; i++) { // // String table = types[i] + "_stable_id"; // // String sql = "SELECT COUNT(*) FROM " + table + " WHERE created_date=0 OR modified_date=0"; // // int rows = DBUtils.getRowCount(con, sql); // // if (rows > 0) { // // ReportManager.problem(this, con, rows + " rows in " + table + " have created or modified dates of 0000-00-00 00:00:00"); // result = false; // // } else { // // ReportManager.correct(this, con, "All entries in " + table + " have valid created/modified timestamps"); // // } // } // return result; // // } }