/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ensembl.healthcheck.testcase.generic; import java.sql.Connection; import java.sql.ResultSet; import java.sql.Statement; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.DatabaseType; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.Species; import org.ensembl.healthcheck.Team; import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase; /** * Check that the start and end of genes and transcripts make sense. */ public class GeneTranscriptStartEnd extends SingleDatabaseTestCase { /** * Create a new GeneTranscriptStartEnd test case. */ public GeneTranscriptStartEnd() { setDescription("Checks that gene start/end agrees with transcript table"); setTeamResponsible(Team.GENEBUILD); } /** * This only applies to core and Vega databases. */ public void types() { removeAppliesToType(DatabaseType.OTHERFEATURES); removeAppliesToType(DatabaseType.RNASEQ); } /** * Run the test. * * @param dbre * The database to use. * @return true if the test passed. * */ public boolean run(DatabaseRegistryEntry dbre) { boolean startEndResult = true; boolean strandResult = true; // check that the lowest transcript start of a gene's transcripts is the // same as the gene's // start // and that the highest transcript end of a gene's transcripts is the same // as the gene's // end // the SQL below will return any where this is /not/ the case String sql = "SELECT g.gene_id, g.stable_id, g.seq_region_start AS gene_start, g.seq_region_end AS gene_end, MIN(tr.seq_region_start) AS min_transcript_start, MAX(tr.seq_region_end) AS max_transcript_end FROM gene g, transcript tr WHERE tr.gene_id=g.gene_id GROUP BY tr.gene_id HAVING (gene_start <> min_transcript_start OR gene_end <> max_transcript_end)"; Connection con = dbre.getConnection(); try { Statement stmt = con.createStatement(); ResultSet rs = stmt.executeQuery(sql); rs.beforeFirst(); // gene GC32491 in drosophila is allowed to have all sorts of things wrong // with it if (rs != null && !rs.isAfterLast() && rs.next() && dbre.getSpecies() != Species.DROSOPHILA_MELANOGASTER && rs.getString("stable_id") != null && !rs.getString("stable_id").equalsIgnoreCase("CG32491")) { ReportManager.problem(this, con, "Gene ID " + rs.getLong(1) + " has start/end that does not agree with transcript start/end"); startEndResult = false; while (rs.next()) { ReportManager.problem(this, con, "Gene ID " + rs.getLong(1) + " has start/end that does not agree with transcript start/end"); startEndResult = false; } rs.close(); if (startEndResult) { ReportManager.correct(this, con, "All gene/transcript start/end agree"); } // also check that all gene's transcripts have the same strand as the // gene sql = "SELECT g.gene_id FROM gene g, transcript tr WHERE tr.gene_id=g.gene_id AND tr.seq_region_strand != g.seq_region_strand"; rs = stmt.executeQuery(sql); while (rs.next()) { ReportManager.problem(this, con, "Gene ID " + rs.getLong(1) + " has strand that does not agree with transcript strand"); strandResult = false; } rs.close(); stmt.close(); if (strandResult) { ReportManager.correct(this, con, "All gene/transcript strands agree"); } } // if drosophila gene } catch (Exception e) { e.printStackTrace(); } return startEndResult && strandResult; } }