/*
* Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
* Copyright [2016-2017] EMBL-European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ensembl.healthcheck.testcase.generic;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.ensembl.healthcheck.DatabaseRegistryEntry;
import org.ensembl.healthcheck.DatabaseType;
import org.ensembl.healthcheck.ReportManager;
import org.ensembl.healthcheck.Team;
import org.ensembl.healthcheck.testcase.AbstractTemplatedTestCase;
import org.ensembl.healthcheck.util.DefaultMapRowMapper;
import org.ensembl.healthcheck.util.MapRowMapper;
/**
* Check for mistakes relating to LRGs
*/
public class LRG extends AbstractTemplatedTestCase {
/**
* Used just for convenience in this test alone
*/
private static enum Feature {
GENE,
TRANSCRIPT;
public String getSqlName() {
return name().toLowerCase();
}
}
public static final String CS_NAME = "lrg";
/**
* Creates a new instance of LRG healthcheck
*/
public LRG() {
setDescription("Healthcheck for LRGs");
setTeamResponsible(Team.CORE);
}
/**
* This only applies to core databases.
*/
public void types() {
removeAppliesToType(DatabaseType.OTHERFEATURES);
removeAppliesToType(DatabaseType.VEGA);
removeAppliesToType(DatabaseType.SANGER_VEGA);
removeAppliesToType(DatabaseType.RNASEQ);
removeAppliesToType(DatabaseType.CDNA);
}
/**
* Runs the LRG tests
*/
@Override
protected boolean runTest(DatabaseRegistryEntry dbre) {
if(assertLrgs(dbre)) {
return
assertLrgFeatureAnnotations(dbre, Feature.GENE) &&
assertLrgFeatureAnnotations(dbre, Feature.TRANSCRIPT);
}
else {
logger.finest("No LRG seq_regions found, skipping test");
return true;
}
}
/**
* Asserts that we have LRG sequence regions in the database
*
* @param dbre Registry entry
* @return Boolean if we found any LRG coordinate systems and sequence regions
*/
protected boolean assertLrgs(DatabaseRegistryEntry dbre) {
String sql = "SELECT count(sr.seq_region_id) FROM coord_system cs JOIN seq_region sr using (coord_system_id) WHERE cs.name = ?";
int count = getTemplate(dbre).queryForDefaultObject(sql, Integer.class, CS_NAME);
return count != 0;
}
/**
* Check that the given features are mapped to a coordinate system called
* lrg and that the biotypes of those linked to an lrg coordinate system
* follow the form <code>LRG%</code>
*
* @param dbre Registry entry
* @param feature Feature to assert
* @return Success of the test. If false the error has already been reported
*/
protected boolean assertLrgFeatureAnnotations(DatabaseRegistryEntry dbre, Feature feature) {
MapRowMapper<String,Integer> mapper = new DefaultMapRowMapper<String,Integer>(String.class, Integer.class);
String featureName = feature.getSqlName();
//Check that all LRG features are linked to the lrg coordinate system
Map<String, Integer> lrgGeneCoordinateSystems = getTemplate(dbre).queryForMap(
"SELECT cs.name, count(*) FROM coord_system cs JOIN seq_region sr using (coord_system_id) join "+featureName+" f using (seq_region_id) WHERE f.biotype LIKE ? GROUP BY cs.name",
mapper,
"LRG%");
if(!lrgGeneCoordinateSystems.containsKey(CS_NAME)) {
ReportManager.problem(this, dbre.getConnection(), "lrg coordinate system exists but no "+featureName+"(s) are attached");
return false;
}
lrgGeneCoordinateSystems.remove(CS_NAME);
if(!lrgGeneCoordinateSystems.isEmpty()) {
String badCoordinateSystems = StringUtils.join(lrgGeneCoordinateSystems.keySet(), ',');
ReportManager.problem(this, dbre.getConnection(), "LRG biotyped "+featureName+"(s) attached to the wrong coordinate systems. ["+badCoordinateSystems+"]");
return false;
}
//Check the inverse that all lrg coord systems contain the right biotype of feature
Map<String, Integer> lrgCoordinateSystemGenes = getTemplate(dbre).queryForMap(
"SELECT f.biotype, count(*) FROM coord_system cs JOIN seq_region sr using (coord_system_id) join "+featureName+" f using (seq_region_id) WHERE cs.name = ? GROUP BY f.biotype",
mapper,
CS_NAME);
List<String> misMaps = new ArrayList<String>();
for(String biotype: lrgCoordinateSystemGenes.keySet()) {
if(biotype.indexOf("LRG") == -1) {
misMaps.add(biotype);
}
}
if(!misMaps.isEmpty()) {
String badBiotypes = StringUtils.join(misMaps, ',');
ReportManager.problem(this, dbre.getConnection(), CS_NAME+" coordinate system has the following wrong biotyped "+featureName+"(s) attached ["+badBiotypes+"]");
return false;
}
return true;
}
} // LRG