/*
* Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
* Copyright [2016-2017] EMBL-European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ensembl.healthcheck.testcase.generic;
import java.sql.Connection;
import org.ensembl.healthcheck.DatabaseRegistryEntry;
import org.ensembl.healthcheck.DatabaseType;
import org.ensembl.healthcheck.ReportManager;
import org.ensembl.healthcheck.Team;
import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase;
import org.ensembl.healthcheck.util.DBUtils;
/**
* Check that all transcripts of genes of protein coding genes translate.
*/
public class TranscriptsTranslate extends SingleDatabaseTestCase {
/**
* Create a new TranscriptsTranslate testcase.
*/
public TranscriptsTranslate() {
setDescription("Check that all protein_coding transcripts translate");
setTeamResponsible(Team.GENEBUILD);
}
/**
* This only really applies to core databases
*/
public void types() {
removeAppliesToType(DatabaseType.OTHERFEATURES);
removeAppliesToType(DatabaseType.CDNA);
removeAppliesToType(DatabaseType.RNASEQ);
}
/**
* Run the test.
*
* @param dbre
* The database to use.
* @return true if the test passed.
*
*/
public boolean run(DatabaseRegistryEntry dbre) {
boolean result = true;
Connection con = dbre.getConnection();
String sql = "SELECT COUNT(*) FROM transcript tr LEFT JOIN translation t ON t.transcript_id=tr.transcript_id WHERE t.translation_id IS NULL AND tr.biotype=\'protein_coding\'";
// protein_coding genes for sangervega only need at least 1 transcript with a translation, they can have transcripts without
// translations in addition to that
// for sangervega ignore genes that do not have source havana or WU
if (dbre.getType() == DatabaseType.SANGER_VEGA || dbre.getType() == DatabaseType.VEGA) {
// sql +=" and (g.source='havana' or g.source='WU')";
sql = "select count(*) from gene g left join ( transcript t join translation p using (transcript_id) ) using (gene_id) where t.transcript_id is null and g.biotype = 'protein_coding' and g.gene_id not in (select gene_id from gene_attrib join attrib_type using (attrib_type_id) where code = 'NoTransRefError') and (g.source='havana' or g.source='WU')";
}
int rows = DBUtils.getRowCount(con, sql);
if (rows != 0) {
ReportManager.problem(this, con, rows + " protein_coding transcript(s) do not have translations.");
result = false;
} else {
ReportManager.correct(this, con, "All protein_coding transcripts have translations");
}
return result;
} // run
} // TranscriptsTranslate