/*
* Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
* Copyright [2016-2017] EMBL-European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ensembl.healthcheck.testcase.generic;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.DecimalFormat;
import org.ensembl.healthcheck.DatabaseRegistryEntry;
import org.ensembl.healthcheck.ReportManager;
import org.ensembl.healthcheck.Team;
import org.ensembl.healthcheck.testcase.Priority;
import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase;
import org.ensembl.healthcheck.util.DBUtils;
/**
* Check for genes with more than one transcript where all the transcripts have the same display_xref_id.
*/
public class TranscriptsSameName extends SingleDatabaseTestCase {
private static int THRESHOLD = 60; // give error if more than this percentage
// of transcripts have the same name
/**
* Create a new TranscriptsSameName testcase.
*/
public TranscriptsSameName() {
setDescription(" Check for genes with more than one transcript where all the transcripts have the same display_xref_id.");
setPriority(Priority.AMBER);
setEffect("Web display and all other uses of xrefs are broken");
setFix("Recalculate display xrefs");
setTeamResponsible(Team.CORE);
}
/**
* Run the test.
*
* @param dbre
* The database to use.
* @return true if the test passed.
*
*/
public boolean run(DatabaseRegistryEntry dbre) {
boolean result = true;
Connection con = dbre.getConnection();
// first get total number of genes that have more than one transcript
// note we have to force the use DBUtils.getRowCountFast here because of the nature
// of the query
int totalGenes = DBUtils.getRowCountFast(con, "SELECT COUNT(1) FROM (SELECT g.gene_id FROM gene g, transcript t WHERE t.gene_id=g.gene_id GROUP BY g.gene_id HAVING COUNT(*) > 1) AS c");
try {
Statement stmt = con.createStatement();
ResultSet rs = stmt.executeQuery("SELECT g.gene_id, t.transcript_id, t.display_xref_id FROM gene g, transcript t WHERE t.gene_id=g.gene_id AND t.display_xref_id IS NOT NULL ORDER BY g.gene_id");
long previousGeneID = -1;
long previousDisplayXrefID = -1;
long lastCountedGeneID = -1;
int sameNameTranscriptCount = 0;
while (rs != null && rs.next()) {
long geneID = rs.getLong(1);
long displayXrefID = rs.getLong(3);
if (geneID == previousGeneID && displayXrefID == previousDisplayXrefID && lastCountedGeneID != geneID) {
sameNameTranscriptCount++;
lastCountedGeneID = geneID;
}
previousGeneID = geneID;
previousDisplayXrefID = displayXrefID;
} // while rs
stmt.close();
double percentage = 100 * ((double) sameNameTranscriptCount / (double) totalGenes);
String percentageStr = new DecimalFormat("##.#").format(percentage);
if (percentage > THRESHOLD) {
ReportManager.problem(this, con, percentageStr + "% of genes with more than one transcript have identically-named transcripts");
result = false;
} else {
ReportManager.correct(this, con, "Only " + percentageStr + "% genes with more than one transcript have identically-named transcripts");
}
} catch (SQLException e) {
e.printStackTrace();
}
return result;
} // run
} // TranscriptsSameName