/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ensembl.healthcheck.testcase.compara; import java.util.List; import java.util.HashMap; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.DatabaseType; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.Team; import org.ensembl.healthcheck.testcase.compara.AbstractComparaTestCase; import org.ensembl.healthcheck.util.DBUtils; public class CheckSpeciesSetSizeByMethod extends AbstractComparaTestCase { // Maps method_link_species_set_id to size protected HashMap<String,String> sizeExceptions = new HashMap<String,String>(); public CheckSpeciesSetSizeByMethod() { setTeamResponsible(Team.COMPARA); appliesToType(DatabaseType.COMPARA); setDescription("Checks that the species-sets have the expected number of genomes"); } public boolean run(DatabaseRegistryEntry dbre) { populateExceptions(dbre); boolean result = true; result &= assertSpeciesSetCountForMLSS(dbre, "ENSEMBL_ORTHOLOGUES", 2); if (! isMasterDB(dbre.getConnection())) { // In the master db, we still have between-species paralogues result &= assertSpeciesSetCountForMLSS(dbre, "ENSEMBL_PARALOGUES", 1); } result &= assertSpeciesSetCountForMLSS(dbre, "ENSEMBL_HOMOEOLOGUES", 1); result &= assertSpeciesSetCountForMLSS(dbre, "BLASTZ_NET", 2); result &= assertSpeciesSetCountForMLSS(dbre, "LASTZ_NET", 2); result &= assertSpeciesSetCountForMLSS(dbre, "TRANSLATED_BLAT_NET", 2); return result; } protected void populateExceptions(DatabaseRegistryEntry dbre) { String sql = "SELECT method_link_species_set_id, value FROM method_link_species_set_tag WHERE tag = 'species_set_size'"; for(String[] a : DBUtils.getRowValuesList(dbre.getConnection(), sql)) { sizeExceptions.put(a[0], a[1]); } } protected boolean assertSpeciesSetCountForMLSS(DatabaseRegistryEntry dbre, String methodLinkType, int expectedCount) { String sql = String.format( "SELECT method_link_species_set_id, name, COUNT(*) AS cnt" + " FROM method_link_species_set JOIN method_link USING (method_link_id) JOIN species_set USING (species_set_id)" + " WHERE type = '%s'" + " GROUP BY method_link_species_set_id" + " HAVING COUNT(*) != %d", methodLinkType, expectedCount); List <String[]> badMLSSs = DBUtils.getRowValuesList(dbre.getConnection(), sql); boolean result = true; for (String [] thisBadMLSS : badMLSSs) { String expected = Integer.toString(expectedCount); if (sizeExceptions.containsKey(thisBadMLSS[0])) { expected = sizeExceptions.get(thisBadMLSS[0]); if (thisBadMLSS[2].equals(expected)) { continue; } } result = false; ReportManager.problem(this, dbre.getConnection(), String.format("The MLSS '%s' (ID %s) has %s GenomeDBs in its species-set instead of %s", thisBadMLSS[1], thisBadMLSS[0], thisBadMLSS[2], expected)); } return result; } }