/*
* Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
* Copyright [2016-2017] EMBL-European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ensembl.healthcheck.testcase.generic;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.Map;
import org.ensembl.healthcheck.DatabaseRegistryEntry;
import org.ensembl.healthcheck.ReportManager;
import org.ensembl.healthcheck.Species;
import org.ensembl.healthcheck.Team;
import org.ensembl.healthcheck.testcase.Priority;
import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase;
import org.ensembl.healthcheck.util.DBUtils;
/**
* Check for certain combinations of logic name and transcript name.
*/
public class TranscriptNames extends SingleDatabaseTestCase {
/**
* Constructor
*/
public TranscriptNames() {
setDescription("Check for certain combinations of logic name and transcript name.");
setPriority(Priority.AMBER);
setEffect("Transcript names do not match the logic names.");
setTeamResponsible(Team.CORE);
setSecondTeamResponsible(Team.GENEBUILD);
}
/**
* Run the test.
*
* @param dbre
* The database to use.
* @return true if the test passed.
*
*/
public boolean run(DatabaseRegistryEntry dbre) {
boolean result = true;
// only valid in human, mouse and zebrafish
Species species = dbre.getSpecies();
boolean is_merged = isMerged(species);
if (!is_merged) {
return true;
}
// Hash of logic_names to the patterns that transcript names should match
Map<String, String> logicNameRegexp = new HashMap<String, String>();
logicNameRegexp.put("ensembl", "-2[0-9][0-9]$");
logicNameRegexp.put("ensembl_havana_transcript", "-0[0-9][0-9]$");
logicNameRegexp.put("havana", "-0[0-9][0-9]$");
//logicNameRegexp.put("ncrna", "-2[0-9][0-9]$");
logicNameRegexp.put("ensembl_ig_gene", "-2[0-9][0-9]$");
logicNameRegexp.put("havana_ig_gene", "-0[0-9][0-9]$");
logicNameRegexp.put("ncrna_pseudogene", "-2[0-9][0-9]$");
logicNameRegexp.put("mt_genbank_import", "-2[0-9][0-9]$");
logicNameRegexp.put("lrg_import", "LRG_[0-9]+t[0-9]+$");
logicNameRegexp.put("ensembl_lincrna", "-2[0-9][0-9]$");
Connection con = dbre.getConnection();
try {
PreparedStatement stmt = con
.prepareStatement("SELECT COUNT(*) FROM analysis a, transcript t, xref x WHERE a.analysis_id=t.analysis_id AND t.display_xref_id=x.xref_id AND a.logic_name=? AND x.display_label NOT REGEXP ?");
for (String logicName : logicNameRegexp.keySet()) {
String regexp = logicNameRegexp.get(logicName);
stmt.setString(1, logicName);
stmt.setString(2, regexp);
ResultSet rs = stmt.executeQuery();
rs.first();
int rows = rs.getInt(1);
if (rows > 0) {
result = false;
ReportManager.problem(this, con, String.format("%d transcripts with logic name %s have names which don't match the required pattern (%s)", rows, logicName, regexp));
} else {
ReportManager.correct(this, con, String.format("All transcripts with logic name %s have correct names", logicName));
}
rs.close();
}
} catch (SQLException se) {
se.printStackTrace();
}
return result;
}
} // TranscriptNames