/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ensembl.healthcheck.testcase.generic; import java.sql.Connection; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.DatabaseType; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.Team; import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase; import org.ensembl.healthcheck.util.DBUtils; /** * Check for any genes/exons that are suspiciously large; > 2Mb for genes, > 0.5Mb for exons. Length assumed to be end-start+1, i.e. * including introns. */ public class BigGeneExon extends SingleDatabaseTestCase { private static long GENE_WARN = 1000000; // warn if length greater than this private static long GENE_ERROR = 3000000; // throw if length greater than this private static long GENE_ENORMOUS = 15000000; // really complain if length greater than this private static long EXON_ERROR = 500000; // warn if length greater than this /** * Create a new BigGeneExon testcase. */ public BigGeneExon() { setDescription("Check for suspiciously long genes & exons"); setTeamResponsible(Team.GENEBUILD); } /** * This only really applies to core databases */ public void types() { removeAppliesToType(DatabaseType.OTHERFEATURES); removeAppliesToType(DatabaseType.ESTGENE); removeAppliesToType(DatabaseType.VEGA); removeAppliesToType(DatabaseType.RNASEQ); } /** * Run the test. * * @param dbre * The database to use. * @return true if the test passed. * */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); // gene - warning String sql = "SELECT COUNT(*) FROM gene WHERE (seq_region_end-seq_region_start+1) >= " + GENE_WARN + " AND (seq_region_end-seq_region_start+1) < " + GENE_ERROR; if (dbre.getType() == DatabaseType.SANGER_VEGA) {// for sangervega ignore genes that do not have source havana or WU sql += " and (source='havana' or source='WU')"; } int rows = DBUtils.getRowCount(con, sql); if (rows > 0) { ReportManager.info(this, con, rows + " genes are longer than " + GENE_WARN + " bases but less than " + GENE_ERROR + " bases"); } else { ReportManager.correct(this, con, "No genes longer than " + GENE_WARN + " bases but less than " + GENE_ERROR + " bases"); } // gene - error sql = "SELECT gene_id FROM gene WHERE (seq_region_end-seq_region_start+1) >= " + GENE_ERROR; if (dbre.getType() == DatabaseType.SANGER_VEGA) {// for sangervega ignore genes that do not have source havana or WU sql += " and (source='havana' or source='WU')"; } String[] longIDs = DBUtils.getColumnValues(con, sql); if (longIDs.length > 0) { String s = longIDs.length > 1 ? "s are " : " is "; ReportManager.problem(this, con, longIDs.length + " gene" + s + "longer than " + GENE_ERROR + " bases"); printLongGeneDetails(con, longIDs); result = false; } else { ReportManager.correct(this, con, "No genes longer than " + GENE_ERROR + " bases"); } // gene - really long sql = "SELECT gene_id FROM gene WHERE (seq_region_end-seq_region_start+1) >= " + GENE_ENORMOUS; if (dbre.getType() == DatabaseType.SANGER_VEGA) {// for sangervega ignore genes that do not have source havana or WU sql += " and (source='havana' or source='WU')"; } longIDs = DBUtils.getColumnValues(con, sql); if (longIDs.length > 0) { String s = longIDs.length > 1 ? "s are " : " is "; ReportManager.problem(this, con, longIDs.length + " gene" + s + "longer than " + GENE_ENORMOUS + " bases - this can't be right!"); printLongGeneDetails(con, longIDs); result = false; } else { ReportManager.correct(this, con, "No genes longer than " + GENE_ENORMOUS + " bases"); } // exon - error sql = "SELECT COUNT(*) FROM exon WHERE (seq_region_end-seq_region_start+1) >= " + EXON_ERROR; rows = DBUtils.getRowCount(con, sql); if (rows > 0) { ReportManager.problem(this, con, rows + " exons are longer than " + EXON_ERROR + " bases"); result = false; } else { ReportManager.correct(this, con, "No exons longer than " + EXON_ERROR + " bases"); } return result; } // run // ------------------------------------------------------------------------------------ private void printLongGeneDetails(Connection con, String[] longIDs) { for (int i = 0; i < longIDs.length; i++) { String id = longIDs[i]; // can't do one single query as not all genes may have // display_xrefs/descriptions String length = DBUtils.getRowColumnValue(con, "SELECT (seq_region_end-seq_region_start+1) AS length FROM gene WHERE gene_id=" + id); String stableID = DBUtils.getRowColumnValue(con, "SELECT stable_id FROM gene WHERE gene_id=" + id); String name = DBUtils.getRowColumnValue(con, "SELECT x.display_label FROM gene g, xref x WHERE x.xref_id=g.display_xref_id AND g.gene_id=" + id); String description = DBUtils.getRowColumnValue(con, "SELECT description FROM gene WHERE gene_id=" + id); String str = "Gene " + stableID; if (name != null && name.length() > 0) { str += " (" + name + ")"; } str += " has length " + length; if (description != null && description.length() > 0) { str += " (" + description + ")"; } ReportManager.problem(this, con, str); } } // ------------------------------------------------------------------------------------ } // BigGeneExon