/*
* Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
* Copyright [2016-2017] EMBL-European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ensembl.healthcheck.testcase.generic;
import java.sql.Connection;
import org.ensembl.healthcheck.DatabaseRegistryEntry;
import org.ensembl.healthcheck.DatabaseType;
import org.ensembl.healthcheck.ReportManager;
import org.ensembl.healthcheck.Species;
import org.ensembl.healthcheck.Team;
import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase;
import org.ensembl.healthcheck.util.DBUtils;
import org.ensembl.healthcheck.util.Utils;
/**
* Check that the seq_region names are in the right format. Only checks human and mouse.
*/
public class SeqRegionName extends SingleDatabaseTestCase {
/**
* Create a new SeqRegionName testcase.
*/
public SeqRegionName() {
setDescription("Check that seq_region names for human and mouse are in the right format.");
setTeamResponsible(Team.GENEBUILD);
}
/**
* Data is only tested in core database, as the tables are in sync
*/
public void types() {
removeAppliesToType(DatabaseType.OTHERFEATURES);
removeAppliesToType(DatabaseType.ESTGENE);
removeAppliesToType(DatabaseType.RNASEQ);
removeAppliesToType(DatabaseType.CDNA);
}
/**
* Run the test.
*
* @param dbre
* The database to use.
* @return true if the test passed.
*
*/
public boolean run(DatabaseRegistryEntry dbre) {
boolean result = true;
Species s = dbre.getSpecies();
Connection con = dbre.getConnection();
String AssemblyAccession = DBUtils.getMetaValue(con, "assembly.accession");
if (AssemblyAccession.contains("GCA")) {
result &= seqRegionNameCheck(con, "clone", "^[a-zA-Z]+[0-9]+\\.[0-9]+$");
result &= seqRegionNameCheck(con, "contig", "^[a-zA-Z]*[0-9]*(\\\\.[0-9]+)+(\\.[0-9+])*$");
result &= seqRegionNameCheck(con, "scaffold", "^[a-zA-Z]*[0-9]*(\\.[0-9]+)+(\\.[0-9]+)*$");
}
if (s.equals(Species.ANCESTRAL_SEQUENCES)) {
result &= seqRegionNameCheck(con, "ancestralsegment", "Ancestor_[0-9]+_[0-9]+$");
}
return result;
} // run
// ----------------------------------------------------------------------
/**
* Check that seq regions of a particular coordinate system are named appropriately.
*
* @return True if all seq_region names match the regexp.
*/
private boolean seqRegionNameCheck(Connection con, String coordinateSystem, String regexp) {
boolean result = true;
int rows = DBUtils.getRowCount(con, String.format(
"SELECT COUNT(*) FROM seq_region sr, coord_system cs WHERE sr.coord_system_id=cs.coord_system_id AND cs.name='%s' AND sr.name NOT LIKE 'LRG%%' AND sr.name NOT LIKE 'MT' AND sr.name NOT REGEXP '%s' ", coordinateSystem,
regexp));
if (rows > 0) {
if (rows == 1 && coordinateSystem.equals("contig")) {
int MT = DBUtils.getRowCount(con, String.format(
"SELECT COUNT(*) FROM seq_region s1, coord_system cs, seq_region s2, assembly asm WHERE s1.coord_system_id = cs.coord_system_id AND cs.name ='%s' AND s1.seq_region_id = cmp_seq_region_id AND s2.seq_region_id = asm_seq_region_id AND s2.name = 'MT' AND s1.name NOT REGEXP '%s' ", coordinateSystem, regexp));
if (MT == 1) {
ReportManager.correct(this, con, String.format("1 MT contig region found with special format"));
} else {
ReportManager.problem(this, con, String.format("%d seq_regions in coordinate system %s have names that are not of the correct format", rows, coordinateSystem));
}
} else {
ReportManager.problem(this, con, String.format("%d seq_regions in coordinate system %s have names that are not of the correct format", rows, coordinateSystem));
result = false;
}
} else {
ReportManager.correct(this, con, String.format("All seq_regions in coordinate system %s have names in the correct format", coordinateSystem));
}
return result;
}
// ----------------------------------------------------------------------
} // SeqRegionName