/*
* Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
* Copyright [2016-2017] EMBL-European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ensembl.healthcheck.testcase.generic;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.Statement;
import org.ensembl.healthcheck.DatabaseRegistryEntry;
import org.ensembl.healthcheck.DatabaseType;
import org.ensembl.healthcheck.ReportManager;
import org.ensembl.healthcheck.Team;
import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase;
/**
* An EnsEMBL Healthcheck test case which checks all exon of a gene are on the same strand and in the correct order in their
* transcript..
*/
public class DuplicateExons extends SingleDatabaseTestCase {
private static final int MAX_WARNINGS = 10;
/**
* Create an OrphanTestCase that applies to a specific set of databases.
*/
public DuplicateExons() {
setTeamResponsible(Team.GENEBUILD);
}
/**
* This test only applies to core and Vega databases.
*/
public void types() {
removeAppliesToType(DatabaseType.OTHERFEATURES);
removeAppliesToType(DatabaseType.CDNA);
removeAppliesToType(DatabaseType.SANGER_VEGA);
removeAppliesToType(DatabaseType.RNASEQ);
}
/**
* Check for duplicate exons.
*
* @param dbre
* The database to check.
* @return True if the test passes.
*/
public boolean run(DatabaseRegistryEntry dbre) {
boolean result = true;
String sql = "SELECT e.exon_id, e.phase, e.seq_region_start AS start, e.seq_region_end AS end, e.seq_region_id AS chromosome_id, e.end_phase, e.seq_region_strand AS strand "
+ ", t.gene_id AS gene_id " + " FROM exon e, exon_transcript et, transcript t " + " WHERE e.exon_id=et.exon_id and et.transcript_id = t.transcript_id "
+ " ORDER BY chromosome_id, gene_id, strand, start, end, phase, end_phase";
Connection con = dbre.getConnection();
try {
Statement stmt = con.createStatement(java.sql.ResultSet.TYPE_FORWARD_ONLY, java.sql.ResultSet.CONCUR_READ_ONLY);
stmt.setFetchSize(1000);
ResultSet rs = stmt.executeQuery(sql);
int exonStart, exonEnd, exonPhase, exonChromosome, exonID, exonEndPhase, exonStrand, exonGeneId;
int lastExonStart = -1;
int lastExonEnd = -1;
int lastExonPhase = -1;
int lastExonChromosome = -1;
int lastExonEndPhase = -1;
int lastExonStrand = -1;
int lastExonID = -1;
int lastExonGeneId = -1;
int duplicateExon = 0;
boolean first = true;
while (rs.next()) {
// load the vars
exonID = rs.getInt("exon_id");
exonPhase = rs.getInt("phase");
exonStart = rs.getInt("start");
exonEnd = rs.getInt("end");
exonChromosome = rs.getInt("chromosome_id");
exonEndPhase = rs.getInt("end_phase");
exonStrand = rs.getInt("strand");
exonGeneId = rs.getInt("gene_id");
if (!first) {
if (lastExonChromosome == exonChromosome && lastExonStart == exonStart && lastExonEnd == exonEnd && lastExonPhase == exonPhase && lastExonStrand == exonStrand
&& lastExonEndPhase == exonEndPhase && lastExonGeneId != exonGeneId && lastExonID != exonID) {
duplicateExon++;
if (duplicateExon <= MAX_WARNINGS) {
ReportManager.warning(this, con, "Exon " + exonID + " in gene " + exonGeneId + " is a duplicate of exon " + lastExonID);
}
}
} else {
first = false;
}
lastExonStart = exonStart;
lastExonEnd = exonEnd;
lastExonChromosome = exonChromosome;
lastExonPhase = exonPhase;
lastExonEndPhase = exonEndPhase;
lastExonStrand = exonStrand;
lastExonID = exonID;
lastExonGeneId = exonGeneId;
} // while rs
if (duplicateExon > 0) {
ReportManager.problem(this, con, "Has at least " + duplicateExon + " duplicated exons.");
result = false;
}
rs.close();
stmt.close();
} catch (Exception e) {
result = false;
e.printStackTrace();
}
// EG write correct report line if all OK
if (result) {
ReportManager.correct(this, con, "No duplicate exons found");
}
return result;
}
} // DuplicateExons