/*
* Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
* Copyright [2016-2017] EMBL-European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Copyright (C) 2003 EBI, GRL
*
* This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.ensembl.healthcheck.testcase.generic;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import org.ensembl.healthcheck.DatabaseRegistryEntry;
import org.ensembl.healthcheck.DatabaseType;
import org.ensembl.healthcheck.ReportManager;
import org.ensembl.healthcheck.Team;
import org.ensembl.healthcheck.testcase.Priority;
import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase;
import org.ensembl.healthcheck.util.DBUtils;
import org.ensembl.healthcheck.util.SqlTemplate;
/**
* Check that the gene and transcript biotypes match the valid current ones in the production database.
*/
public class ProductionBiotypes extends SingleDatabaseTestCase {
/**
* Constructor.
*/
public ProductionBiotypes() {
setDescription("Check that the gene and transcript biotypes match the valid current ones in the production database.");
setPriority(Priority.AMBER);
setEffect("Unknown/incorrect biotypes.");
setTeamResponsible(Team.GENEBUILD);
}
/**
* This test Does not apply to sangervega dbs
*/
public void types() {
removeAppliesToType(DatabaseType.SANGER_VEGA);
}
/**
* Run the test.
*
* @param dbre
* The database to use.
* @return true if the test passed.
*
*/
public boolean run(DatabaseRegistryEntry dbre) {
String databaseType = dbre.getType().getName(); // will be core, otherfeatures etc
Set<String> coreBiotypes = getBiotypesDb(dbre, new String[]{"gene", "transcript"});
Set<String> productionBiotypes = getBiotypesProduction(dbre, databaseType);
return checkBiotypeExists(dbre, coreBiotypes, productionBiotypes, "production");
}
private <T extends CharSequence> boolean checkBiotypeExists(DatabaseRegistryEntry dbre, Collection<T> core, Collection<T> production, String type) {
Set<T> missing = new HashSet<T>(core);
missing.removeAll(production);
if(missing.isEmpty()) {
ReportManager.correct(this, dbre.getConnection(), "Set of biotypes matches the current valid list in the production database.");
return true;
}
for(CharSequence name: missing) {
String msg = String.format("The biotype '%s' is missing from %s", name, type);
ReportManager.problem(this, dbre.getConnection(), msg);
}
return false;
}
private Set<String> getBiotypesDb(DatabaseRegistryEntry dbre, String[] tables) {
SqlTemplate t = DBUtils.getSqlTemplate(dbre);
Set<String> results = new HashSet<String>();
for (String table : tables) {
String sql = "SELECT DISTINCT(biotype) FROM " + table;
results.addAll(t.queryForDefaultObjectList(sql, String.class));
}
return results;
}
private Set<String> getBiotypesProduction(DatabaseRegistryEntry dbre, String databaseType) {
SqlTemplate t = DBUtils.getSqlTemplate(getProductionDatabase());
String[] tables = { "gene", "transcript" };
Set<String> results = new HashSet<String>();
for (String table : tables) {
String sql = "SELECT name FROM biotype WHERE object_type='" + table + "' AND is_current = 1 AND FIND_IN_SET('" + databaseType + "', db_type) > 0";
results.addAll(t.queryForDefaultObjectList(sql, String.class));
}
return results;
}
}