/*
* Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
* Copyright [2016-2017] EMBL-European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ensembl.healthcheck.testcase.generic;
import java.sql.Connection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import org.ensembl.healthcheck.DatabaseRegistryEntry;
import org.ensembl.healthcheck.DatabaseType;
import org.ensembl.healthcheck.ReportManager;
import org.ensembl.healthcheck.Team;
import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase;
import org.ensembl.healthcheck.util.DBUtils;
/**
* Check for presence and format of PFAM hits, and format of others. Also checks for protein features with no hit_id.
*/
public class Accession extends SingleDatabaseTestCase {
private HashMap formats = new HashMap();
/**
* Constructor.
*/
public Accession() {
setDescription("Check for presence and format of PFAM etc hits");
setTeamResponsible(Team.GENEBUILD);
// add to this hash to check for other types and formats
formats.put("pfam", "PF_____%");
formats.put("prints", "PR_____");
formats.put("prosite", "PS_____");
formats.put("profile", "PS_____");
formats.put("scanprosite", "PS_____");
}
/**
* This test applies only to core dbs
*/
public void types() {
removeAppliesToType(DatabaseType.SANGER_VEGA);
removeAppliesToType(DatabaseType.VEGA);
removeAppliesToType(DatabaseType.CDNA);
removeAppliesToType(DatabaseType.OTHERFEATURES);
removeAppliesToType(DatabaseType.RNASEQ);
}
/**
* Check each type of hit.
*
* @param dbre
* The database to check.
* @return Result.
*/
public boolean run(final DatabaseRegistryEntry dbre) {
boolean result = true;
Connection con = dbre.getConnection();
// check that there is at least one PFAM hit
// others - prints, prosite etc - may not have any hits
// only a problem for core databses
if (dbre.getType() == DatabaseType.CORE) {
int hits = DBUtils.getRowCount(con, "SELECT COUNT(*) FROM protein_feature pf, analysis a WHERE a.logic_name='pfam' AND a.analysis_id=pf.analysis_id");
if (hits < 1) {
result = false;
ReportManager.problem(this, con, "No proteins with PFAM hits");
} else {
ReportManager.correct(this, con, hits + " proteins with PFAM hits");
}
}
// check formats for others
Set keys = formats.keySet();
Iterator it2 = keys.iterator();
while (it2.hasNext()) {
String key = (String) it2.next();
logger.fine("Checking for logic name " + key + " with hits of format " + formats.get(key));
// check format of hits
int badFormat = DBUtils.getRowCount(con, "SELECT COUNT(*) FROM protein_feature pf, analysis a WHERE a.logic_name='" + key + "' AND a.analysis_id=pf.analysis_id AND pf.hit_name NOT LIKE '"
+ formats.get(key) + "'");
if (badFormat > 0) {
result = false;
ReportManager.problem(this, con, badFormat + " " + key + " hit IDs are not in the correct format");
}
}
// check for protein features with no hit_id
int nullHitIDs = DBUtils.getRowCount(con, "SELECT COUNT(*) FROM protein_feature WHERE hit_name IS NULL OR hit_name=''");
if (nullHitIDs > 0) {
result = false;
ReportManager.problem(this, con, nullHitIDs + " protein features have null or blank hit_names");
} else {
ReportManager.correct(this, con, "No protein features have null or blank hit_names");
}
return result;
}
}