/*
* Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
* Copyright [2016-2017] EMBL-European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* InterproHitCount
*
* @author dstaines
* @author $Author$
* @version $Revision$
*/
package org.ensembl.healthcheck.testcase.eg_core;
import org.ensembl.healthcheck.DatabaseRegistryEntry;
import org.ensembl.healthcheck.ReportManager;
import org.ensembl.healthcheck.util.SqlTemplate;
/**
* Test to check that at least a minimal threshold of protein coding genes have at least 1 interpro hit
*
* @author dstaines
*
*/
public class InterproHitCount extends AbstractEgCoreTestCase {
private final static String GENE_COUNT_SQL = "select count(distinct(gene.gene_id)) from gene "
+ "join seq_region using (seq_region_id) " + "join coord_system using (coord_system_id) "
+ "where gene.biotype='protein_coding' and species_id=?";
private final static String IPR_COUNT_SQL = "select count(distinct(gene.gene_id)) from gene "
+ "join seq_region using (seq_region_id) " + "join coord_system using (coord_system_id) "
+ "join transcript using (gene_id) " + "join translation using (transcript_id) "
+ "join protein_feature using (translation_id) " + "join interpro on (id=hit_name) "
+ "where gene.biotype='protein_coding' and species_id=?";
private final static double THRESHOLD = 0.5;
public InterproHitCount() {
super();
}
/*
* (non-Javadoc)
*
* @see
* org.ensembl.healthcheck.testcase.AbstractTemplatedTestCase#runTest(org
* .ensembl.healthcheck.DatabaseRegistryEntry)
*/
@Override
protected boolean runTest(DatabaseRegistryEntry dbre) {
boolean result = true;
SqlTemplate temp = getSqlTemplate(dbre);
// count number genes
for (int speciesId : dbre.getSpeciesIds()) {
int geneN = temp.queryForDefaultObject(GENE_COUNT_SQL,
Integer.class, speciesId);
// count number of genes with at least 1 interpro hit
int iprN = temp.queryForDefaultObject(IPR_COUNT_SQL, Integer.class,
speciesId);
double ratio = (double)iprN / geneN;
if (ratio < THRESHOLD) {
ReportManager.problem(this, dbre.getConnection(), iprN
+ " protein_coding genes of a total of " + geneN
+ " for species " + speciesId
+ " have at least one interpro domain -"
+ " this is less than the suggested threshold of "
+ THRESHOLD * 100 + "%");
result = false;
} else {
ReportManager.info(this, dbre.getConnection(), iprN
+ " protein_coding genes of a total of " + geneN
+ " for species " + speciesId
+ " have at least one interpro domain");
}
}
return result;
}
/* (non-Javadoc)
* @see org.ensembl.healthcheck.testcase.eg_core.AbstractEgCoreTestCase#getEgDescription()
*/
@Override
protected String getEgDescription() {
return "Test to check that at least " + THRESHOLD * 100
+ "% of protein coding genes have at least one interpro hit";
}
}