/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ensembl.healthcheck.testcase.generic; import java.sql.Connection; import java.util.Collection; import java.util.List; import java.util.Map; import org.apache.commons.collections.CollectionUtils; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.DatabaseType; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.Team; import org.ensembl.healthcheck.testcase.Priority; import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase; import org.ensembl.healthcheck.util.DBUtils; import org.ensembl.healthcheck.util.SqlTemplate; import org.ensembl.healthcheck.util.StringListMapRowMapper; public class ProductionSpeciesAlias extends SingleDatabaseTestCase { public ProductionSpeciesAlias() { setDescription("Check that all the species aliases in production are also in the core databases"); setPriority(Priority.AMBER); setEffect("Missing aliases can not be searched for"); setFix("Re-run populate_species_meta.pl script"); setTeamResponsible(Team.GENEBUILD); setSecondTeamResponsible(Team.RELEASE_COORDINATOR); } public void types() { removeAppliesToType(DatabaseType.SANGER_VEGA); } @Override public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; String species = dbre.getSpecies().toString(); Connection con = dbre.getConnection(); DatabaseRegistryEntry prodDbre = getProductionDatabase(); Connection prodCon = prodDbre.getConnection(); List<String> dbAliases = DBUtils.getColumnValuesList(con, "SELECT DISTINCT(meta_value) FROM meta WHERE meta_key = 'species.alias'"); List<String> productionAliases = DBUtils.getColumnValuesList(prodCon, "SELECT sa.alias FROM species_alias sa, species s " + "WHERE s.species_id = sa.species_id " + "AND s.db_name = '" + species + "' AND " + "s.is_current = 1 AND sa.is_current = 1"); result &= checkName(dbre, prodDbre, species); if (result) { // Looking for aliases which have been added directly into the // species // database // These should always be added to the production database, then // synced // across using the populate_species_meta script result &= checkHasAlias(dbre, dbAliases, productionAliases, "production"); // Looking for aliases which are missing from the species database // This means the populate_species_meta script has not been run // since // the entry was added to the production database // The populate_species_meta script is located in // ensembl/misc-scripts/production_database/scripts result &= checkHasAlias(dbre, productionAliases, dbAliases, "species"); result &= checkUrl(dbre, prodDbre, species); result &= checkScientific(dbre, prodDbre, species); result &= checkConsistent(dbre, species); } return result; } private <T extends CharSequence> boolean checkHasAlias(DatabaseRegistryEntry dbre, Collection<T> core, Collection<T> toRemove, String type) { Collection<String> dbOnly = (Collection<String>) CollectionUtils.subtract(core, toRemove); if (dbOnly.isEmpty()) { return true; } else { for (String key : dbOnly) { String msg = String.format("Species alias '%s' is not in the %s database", key, type); ReportManager.problem(this, dbre.getConnection(), msg); } return false; } } // Checking species URL name // Should be both in the production and the core databases // Should start with a capital letter and have underscores between the names private <T extends CharSequence> boolean checkUrl(DatabaseRegistryEntry dbre, DatabaseRegistryEntry prodDbre, String species) { SqlTemplate t = DBUtils.getSqlTemplate(dbre); SqlTemplate prodt = DBUtils.getSqlTemplate(prodDbre); String sql = "SELECT meta_value FROM meta WHERE meta_key = 'species.url'"; String prodSql = "SELECT url_name FROM species WHERE db_name = ?"; String url = t.queryForDefaultObject(sql, String.class); List<String> prodUrlL = prodt.queryForDefaultObjectList(prodSql, String.class, species); if (prodUrlL.isEmpty()) { ReportManager.problem(this, dbre.getConnection(), "Species " + species + " not found in the production database"); return false; } else { String prodUrl = prodUrlL.get(0); if (url.equals(prodUrl)) { if (url.matches("^[A-Z]{1}[a-z0-9]*(_[a-zA-Z0-9]*)+")) { ReportManager.correct(this, dbre.getConnection(), "species.url '" + url + "' is the same in both databases and is in the correct format"); return true; } else { ReportManager.problem(this, dbre.getConnection(), "species.url '" + url + "' is not in the correct format. Should start with a capital letter and have underscores to separate names"); return false; } } else { ReportManager.problem(this, dbre.getConnection(), "species.url '" + url + "' in database does not match '" + prodUrl + "' in the production database"); return false; } } } // Checking species production name // Should be both in the production and the core databases // Should contain only lower case caracters and underscores private <T extends CharSequence> boolean checkName(DatabaseRegistryEntry dbre, DatabaseRegistryEntry prodDbre, String species) { SqlTemplate t = DBUtils.getSqlTemplate(dbre); SqlTemplate prodt = DBUtils.getSqlTemplate(prodDbre); String sql = "SELECT meta_value FROM meta WHERE meta_key = 'species.production_name'"; String prodSql = "SELECT production_name FROM species WHERE db_name = ?"; String name = t.queryForDefaultObject(sql, String.class); List<String> prodNameL = prodt.queryForDefaultObjectList(prodSql, String.class, species); if (prodNameL.isEmpty()) { ReportManager.problem(this, dbre.getConnection(), "species " + species + " not found in production database"); return false; } else if (prodNameL.size() > 1) { ReportManager.problem(this, dbre.getConnection(), "Multiple entries for species " + species + " found in production database"); return false; } else { String prodName = prodNameL.get(0); if (name.equals(prodName)) { if (name.matches("^[a-z0-9_]*$")) { ReportManager.correct(this, dbre.getConnection(), "species.production_name '" + name + "' is the same in both databases and is in the correct format"); return true; } else { ReportManager.problem(this, dbre.getConnection(), "species.production_name '" + name + "' is not in the correct format. It should only contain lower case caracters and underscores"); return false; } } else { ReportManager.problem(this, dbre.getConnection(), "species.production_name '" + name + "' in database does not match '" + prodName + "' in the production database"); return false; } } } // Checking species scientific name // Should be both in the production and the core databases // Should start with a capital letter and have spaces between the names private <T extends CharSequence> boolean checkScientific(DatabaseRegistryEntry dbre, DatabaseRegistryEntry prodDbre, String species) { SqlTemplate t = DBUtils.getSqlTemplate(dbre); SqlTemplate prodt = DBUtils.getSqlTemplate(prodDbre); String sql = "SELECT meta_value FROM meta WHERE meta_key = 'species.scientific_name'"; String prodSql = "SELECT scientific_name FROM species WHERE db_name = ?"; String url = t.queryForDefaultObject(sql, String.class); String prodUrl = prodt.queryForDefaultObject(prodSql, String.class, species); if (url.equals(prodUrl)) { if (url.matches("^[A-Z]{1}[a-z0-9]*( [a-z0-9]*){1,2}")) { ReportManager.correct(this, dbre.getConnection(), "species.scientific_name '" + url + "' is the same in both databases and is in the correct format"); return true; } else { ReportManager.problem(this, dbre.getConnection(), "species.scientific_name '" + url + "' is not in the correct format. Should start with a capital letter and have underscores to separate names"); return false; } } else { ReportManager.problem(this, dbre.getConnection(), "species.scientific '" + url + "' in database does not match '" + prodUrl + "' in the production database"); return false; } } // Checking all species meta keys are consistent // Once removed all the capitalisation and separators, should all be the // same private <T extends CharSequence> boolean checkConsistent(DatabaseRegistryEntry dbre, String species) { boolean result = true; SqlTemplate t = DBUtils.getSqlTemplate(dbre); StringBuilder shortName = new StringBuilder(); for (String speciesChunk : species.split("_")) { shortName.append(speciesChunk); } String all_sql = "SELECT meta_value, meta_key FROM meta WHERE meta_key in ('species.scientific_name', 'species.production_name', 'species.url')"; Map<String, List<String>> keys = t.queryForMap(all_sql, new StringListMapRowMapper()); String sql = "SELECT meta_value FROM meta WHERE meta_key in ('species.scientific_name', 'species.production_name', 'species.url')"; List<String> names = t.queryForDefaultObjectList(sql, String.class); for (String name : names) { StringBuilder fullName = new StringBuilder(); for (String nameChunk : name.split("_| ")) { fullName.append(nameChunk.toLowerCase()); } if (fullName.equals(shortName)) { ReportManager.problem(this, dbre.getConnection(), keys.get(name) + " has a meta value which does not match the correct species name " + species); result = false; } } return result; } }