/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ensembl.healthcheck.testcase.generic; import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.HashMap; import java.util.Map; import org.ensembl.healthcheck.DatabaseRegistry; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.DatabaseType; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.Team; import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase; import org.ensembl.healthcheck.util.DBUtils; import org.ensembl.healthcheck.util.DefaultMapRowMapper; import org.ensembl.healthcheck.util.MapRowMapper; /** * Check that features exist for expected analysis types, and that all analysis * types have features. */ public class FeatureAnalysis extends SingleDatabaseTestCase { // the following tables have an analysis_id column but are NOT really feature tables String[] featureTables = getCoreTablesWithAnalysisID(); private String[] proteinFeatureAnalyses = { "prints", "pfscan", "signalp", "seg", "ncoils", "pfam", "tmhmm" }; /** * Creates a new instance of FeatureAnalysis */ public FeatureAnalysis() { setHintLongRunning(true); setDescription("Check that features exist for the expected analyses."); setTeamResponsible(Team.GENEBUILD); } /** * This only applies to core and Vega databases. */ public void types() { removeAppliesToType(DatabaseType.VEGA); removeAppliesToType(DatabaseType.CDNA); } /** * Run the test. * * @param dbre * The database to use. * @return true if the test passed. * */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; // -------------------------------- // First check that all analyses have some features result &= checkAnalysesHaveFeatures(dbre, featureTables); // -------------------------------- // Check protein_feature result &= checkFeatureAnalyses(dbre, "protein_feature", proteinFeatureAnalyses); // ------------------------------- // TODO - other feature tables // ------------------------------- return result; } // run // --------------------------------------------------------------------- /** * Check that all the analyses in the analysis table have some features * associated. */ private boolean checkAnalysesHaveFeatures(DatabaseRegistryEntry dbre, String[] featureTables) { boolean result = true; Connection con = dbre.getConnection(); try { Map<Integer, String> analysesFromFeatureTables = new HashMap<Integer, String>(); Map<Integer, String> analysesFromAnalysisTable = getLogicNamesFromAnalysisTable(con); // build cumulative list of analyses from feature tables for (String featureTable: featureTables) { logger.fine("Collecting analysis IDs from " + featureTable); String sql = String.format("SELECT DISTINCT(analysis_id), COUNT(*) AS count FROM %s GROUP BY analysis_id", featureTable); MapRowMapper<Integer, Integer> mapper = new DefaultMapRowMapper<Integer, Integer>(Integer.class, Integer.class); Map<Integer, Integer> tableAnalysis = getSqlTemplate(dbre).queryForMap(sql, mapper); for(Map.Entry<Integer, Integer> entry: tableAnalysis.entrySet()) { Integer id = entry.getKey(); Integer count = entry.getValue(); String priorTable = analysesFromFeatureTables.get(id); if(priorTable != null) { //Gene & transcript are allowed to share analysis id if("transcript".equals(featureTable) && "gene".equals(priorTable)) { result = true; } //Unmapped object, object_xref & marker_feature are allowed to share else if ("unmapped_object".equals(featureTable) && ("object_xref".equals(priorTable) || "marker_feature".equals(priorTable))) { result = true; } //Operon & operon transcript are allowed to share else if ("operon".equals(featureTable) && "operon_transcript".equals(priorTable)) { result = true; } else { String msg = String.format("Analysis with ID %d is used in %s as well as %s", id, featureTable, priorTable); ReportManager.problem(this, con, msg); } } else { analysesFromFeatureTables.put(id, featureTable); } // check that each analysis actually exists in the analysis // table if(! analysesFromAnalysisTable.containsKey(id) && ! "object_xref".equals(featureTable)) { String msg = String.format("Analysis ID %d is used in %d rows in %s but is not present in the analysis table", id, count, featureTable); ReportManager.problem(this, con, msg); } } } // look at each analysis ID *from the analysis table* to see if it's // used // somewhere Statement stmt = con.createStatement(); ResultSet rs = stmt .executeQuery("SELECT analysis_id, logic_name FROM analysis"); while (rs.next()) { int analysisID = rs.getInt("analysis_id"); String logicName = rs.getString("logic_name"); if (!analysesFromFeatureTables.containsKey(analysisID)) { ReportManager.problem(this, con, "Analysis with ID " + analysisID + ", logic name " + logicName + " is not used in any feature table"); result = false; } } rs.close(); stmt.close(); } catch (SQLException se) { se.printStackTrace(); } return result; } // ----------------------------------------------------------------- private boolean checkFeatureAnalyses(DatabaseRegistryEntry dbre, String table, String[] analyses) { boolean result = true; logger.fine("Checking analyses for " + table + " in " + dbre.getName()); int tableCount = DBUtils.countRowsInTable(dbre.getConnection(), table); if(tableCount == 0) { logger.fine("Table "+table+" has no rows. Skipping"); return true; } for (int i = 0; i < analyses.length; i++) { String sql = "SELECT COUNT(*) FROM " + table + ", analysis a WHERE LCASE(a.logic_name)='" + analyses[i] + "' AND a.analysis_id=" + table + ".analysis_id"; Connection con = dbre.getConnection(); int rows = DBUtils.getRowCount(con, sql); if (rows == 0) { ReportManager.problem(this, con, table + " has no features for analysis " + analyses[i]); result = false; } else { ReportManager.correct(this, con, table + " has features for analysis " + analyses[i]); } } return result; } // ----------------------------------------------------------------- } // FeatureAnalysis