/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * File: EgMetaTest.java * Created by: dstaines * Created on: May 26, 2009 * CVS: $$ */ package org.ensembl.healthcheck.testcase.eg_core; import java.util.List; import java.util.regex.Pattern; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.StringUtils; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.util.SqlTemplate; import org.ensembl.healthcheck.util.TestCaseUtils; /** * Test for whether EnsemblGenomes species are correctly named * * @author dstaines * */ public class AliasAndNaming extends AbstractEgCoreTestCase { private final static String META_QUERY = "select meta_value from meta where meta_key=? and species_id=?"; private static final Pattern VALID_PRODUCTION_NAME = Pattern .compile("^[0-9a-z_]+$"); private static final Pattern INVALID_PRODUCTION_NAME2 = Pattern .compile("__+"); private static final String PRODUCTION_NAME = "species.production_name"; private static final String DB_NAME = "species.db_name"; private static final String SCI_NAME = "species.scientific_name"; private static final String ALIAS = "species.alias"; protected boolean runTest(DatabaseRegistryEntry dbre) { SqlTemplate template = getTemplate(dbre); boolean passes = true; for (int speciesId : dbre.getSpeciesIds()) { passes &= checkNames(dbre, template, speciesId); } return passes; } private String getName(DatabaseRegistryEntry dbre, SqlTemplate template, int speciesId, String key) { List<String> sciNames = template.queryForDefaultObjectList(META_QUERY, String.class, key, speciesId); if (sciNames.size() != 1) { ReportManager.problem(this, dbre.getConnection(), "Expect exactly one name for key " + key + " for species ID " + speciesId); return null; } else { return sciNames.get(0); } } private boolean checkNames(DatabaseRegistryEntry dbre, SqlTemplate template, int speciesId) { boolean passes = true; List<String> aliases = template.queryForDefaultObjectList(META_QUERY, String.class, ALIAS, speciesId); String sciName = getName(dbre, template, speciesId, SCI_NAME); String binomialName = null; if (dbre.isMultiSpecies()) { binomialName = TestCaseUtils.getBinomialNameMulti(template, speciesId); String dbName = getName(dbre, template, speciesId, DB_NAME); if (!binomialName.equals(sciName) && !binomialName.equals(dbName) && !aliases.contains(binomialName)) { passes = false; ReportManager.problem(this, dbre.getConnection(), "There should be one " + ALIAS + " or " + DB_NAME + " meta value that matches name '" + binomialName + "' for species " + speciesId); } } else { binomialName = TestCaseUtils.getBinomialName(template, speciesId); if (!aliases.contains(binomialName)) { passes = false; ReportManager.problem(this, dbre.getConnection(), "No " + ALIAS + " meta value found that matches name '" + binomialName + "' for species " + speciesId); ReportManager .problem( this, dbre.getConnection(), "INSERT INTO " + dbre.getName() + ".meta(species_id,meta_key,meta_value) VALUES(" + speciesId + ",'species.alias','" + binomialName + "');"); } } String productionName = org.ensembl.healthcheck.util.CollectionUtils.getFirstElement(template.queryForDefaultObjectList(META_QUERY, String.class, PRODUCTION_NAME, speciesId),null); if (StringUtils.isEmpty(productionName)) { passes = false; ReportManager.problem(this, dbre.getConnection(), "Meta value for " + PRODUCTION_NAME + " is not set for species " + speciesId); } else if (!VALID_PRODUCTION_NAME.matcher(productionName).matches()) { passes = false; ReportManager.problem(this, dbre.getConnection(), "Meta value " + productionName + " for key " + PRODUCTION_NAME + " does not match the required value for species " + speciesId); } else if (INVALID_PRODUCTION_NAME2.matcher(productionName).matches()) { passes = false; ReportManager.problem(this, dbre.getConnection(), "Meta value " + productionName + " for key " + PRODUCTION_NAME + " does not match the required value for species " + speciesId); } return passes; } /* (non-Javadoc) * @see org.ensembl.healthcheck.testcase.AbstractTemplatedTestCase#getEgDescription() */ @Override protected String getEgDescription() { return "Tests whether species are correctly named for Ensembl Genomes"; } }