/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ensembl.healthcheck.testcase.generic; import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.DatabaseType; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.Team; import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase; import org.ensembl.healthcheck.util.DBUtils; /** * Check for biotype mismatch between the genes and transcripts */ public class GeneTranscriptBiotypeMismatch extends SingleDatabaseTestCase { /** * Create a new GeneTranscriptBiotypeMismatch testcase. */ public GeneTranscriptBiotypeMismatch() { setDescription("Check for biotype mismatch between the genes and transcripts"); setTeamResponsible(Team.GENEBUILD); } /** * This only really applies to core databases */ public void types() { removeAppliesToType(DatabaseType.OTHERFEATURES); removeAppliesToType(DatabaseType.CDNA); removeAppliesToType(DatabaseType.RNASEQ); } /** * Run the test. * * @param dbre * The database to use. * @return true if the test passed. * */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); //Joining the gene table with the transcript table //Get the biotypes and compare the list //Transcript biotype is limited to '%coding', which captures 'protein_coding' and 'unknown_likely_coding' //Gene biotype is limited to the list of non-coding biotypes (\'lincRNA\',\'snoRNA\',\'antisense\',\'snRNA\',\'Mt_tRNA\',\'processed_transcript\',\'macro_lncRNA\')"; String sql = "SELECT g.gene_id,g.stable_id,g.biotype,t.transcript_id, t.stable_id, t.biotype " + "FROM gene g " + "INNER JOIN transcript t " + "WHERE g.gene_id=t.gene_id " + "AND t.biotype like \"%coding\" " + "AND g.biotype in (\'lincRNA\',\'snoRNA\',\'antisense\',\'snRNA\',\'Mt_tRNA\',\'processed_transcript\',\'macro_lncRNA\')"; int rows = DBUtils.getRowCount(con, sql); if (rows != 0) { result = false; try { Statement stmt = con.createStatement(); ResultSet rs = stmt.executeQuery(sql); while (rs != null && rs.next()) { String transcript_stableID = rs.getString(5); String gene_stableID = rs.getString(2); String transcript_biotype = rs.getString(6); String gene_biotype = rs.getString(3); ReportManager.problem(this, con, "Transcripts with stableID " + transcript_stableID + " have coding biotype '" + transcript_biotype + "' that doesn't match with gene with stableID "+ gene_stableID +" non-coding biotype '" + gene_biotype + "'"); } // while rs stmt.close(); } catch (SQLException e) { e.printStackTrace(); } } else { ReportManager.correct(this, con, "There is no biotype mismatch between the genes and transcripts for coding types"); } return result; } // run } // GeneTranscriptBiotypeMismatch