/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ensembl.healthcheck.testcase.eg_compara; import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; import java.util.Arrays; import java.util.List; import java.util.Map; import org.apache.commons.lang.StringUtils; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.DatabaseType; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.Team; import org.ensembl.healthcheck.testcase.AbstractTemplatedTestCase; import org.ensembl.healthcheck.util.AbstractStringMapRowMapper; import org.ensembl.healthcheck.util.CollectionUtils; import org.ensembl.healthcheck.util.DBUtils; import org.ensembl.healthcheck.util.DefaultMapRowMapper; import org.ensembl.healthcheck.util.MapRowMapper; public class EGForeignKeyMethodLinkSpeciesSetId extends AbstractTemplatedTestCase { public EGForeignKeyMethodLinkSpeciesSetId() { setTeamResponsible(Team.ENSEMBL_GENOMES); appliesToType(DatabaseType.COMPARA); setDescription("Checks the consistency of MLSS foreign keys"); } @Override protected boolean runTest(DatabaseRegistryEntry dbre) { boolean result = true; result &= assertNoEmptyNames(dbre); result &= assertNoSource(dbre); result &= assertMethodLinkSpeciesSetCounts(dbre); if (DBUtils.getShortDatabaseName(dbre.getConnection()).contains(System.getProperty("compara_master.database"))) { return result; } result &= assertMlssIdForeignKeysAndRanges(dbre); result &= assertMlssGeneTreeRootOrphans(dbre); result &= assertGeneTreeRootOrphans(dbre); result &= assertMlssGenomicAlignOrphans(dbre); result &= assertGenomicAlignOrphans(dbre); return result; } /** * Check for MLSS which lack a name */ protected boolean assertNoEmptyNames(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); int numOfUnsetNames = DBUtils .getRowCount( con, "SELECT count(*) FROM method_link_species_set WHERE name = 'NULL' OR name IS NULL"); if (numOfUnsetNames > 0) { ReportManager.problem(this, con, "FAILED method_link_species_set table contains " + numOfUnsetNames + " with no name"); result = false; } return result; } /** * Check for MLSS which lack a source */ protected boolean assertNoSource(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); int numOfUnsetSources = DBUtils .getRowCount( con, "SELECT count(*) FROM method_link_species_set WHERE source = 'NULL' OR source IS NULL"); if (numOfUnsetSources > 0) { ReportManager.problem(this, con, "FAILED method_link_species_set table contains " + numOfUnsetSources + " with no source"); result = false; } return result; } /** * Loops through all known method link types from * {@link #getMethodLinkTypeToTable()} and uses * {@link #getMethodLinkTypeRange()} to assert that all link method link * species set identifiers have the correct method_link_id range */ protected boolean assertMlssIdForeignKeysAndRanges( DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); Map<String, String> typeToTable = getMethodLinkTypeToTable(); Map<String, List<Integer>> typeToRanges = getMethodLinkTypeRange(); for (Map.Entry<String, String> entry : typeToTable.entrySet()) { String type = entry.getKey(); String table = entry.getValue(); List<Integer> ranges = typeToRanges.get(type); Integer lower = ranges.get(0); Integer upper = ranges.get(1); result &= checkForOrphansWithConstraint(con, "method_link_species_set", "method_link_species_set_id", table, "method_link_species_set_id", "method_link_id >= " + lower + " and method_link_id < " + upper); result &= checkForOrphans(con, table, "method_link_species_set_id", "method_link_species_set", "method_link_species_set_id"); } return result; } /** * Check for the number of MLSS unlinked to a protein tree and those protein * tree members unlinked to a MLSS */ protected boolean assertMlssGeneTreeRootOrphans( DatabaseRegistryEntry dbre) { return checkForOrphansWithConstraint( dbre.getConnection(), "method_link_species_set", "method_link_species_set_id", "gene_tree_root", "method_link_species_set_id", "method_link_id IN (SELECT method_link_id FROM method_link WHERE class LIKE 'ProteinTree.%')"); } protected boolean assertGeneTreeRootOrphans(DatabaseRegistryEntry dbre) { return checkForOrphans(dbre.getConnection(), "gene_tree_root", "method_link_species_set_id", "method_link_species_set", "method_link_species_set_id"); } protected boolean assertMlssGenomicAlignOrphans( DatabaseRegistryEntry dbre) { return checkForOrphansWithConstraint( dbre.getConnection(), "method_link_species_set", "method_link_species_set_id", "genomic_align_block", "method_link_species_set_id", "method_link_id BETWEEN 1 AND 99 AND method_link_id != 11"); } protected boolean assertGenomicAlignOrphans(DatabaseRegistryEntry dbre) { return checkForOrphans(dbre.getConnection(), "genomic_align_block", "method_link_species_set_id", "method_link_species_set", "method_link_species_set_id"); } // Hashed out because we do not do this kind of analysis yet // protected boolean assertNCTreeMethodLinkSpeciesSet( // DatabaseRegistryEntry dbre) { // return checkForOrphansWithConstraint( // dbre.getConnection(), // "method_link_species_set", // "method_link_species_set_id", // "nc_tree_member", // "method_link_species_set_id", // "method_link_id IN (SELECT method_link_id FROM method_link WHERE class LIKE 'NCTree.%')"); // } /** * loops through all method link species sets where an expected count is * known from {@link #getMethodLinkTypeToExpectedCounts()} and asserts that * the number of species in the method link species set is equal to one of * those values */ protected boolean assertMethodLinkSpeciesSetCounts( DatabaseRegistryEntry dbre) { boolean result = true; Map<String, List<Long>> methodLinkToMlssId = getMethodLinkTypeToMlssId(dbre); Map<Long, Integer> mlssIdToCount = getMlssIdCount(dbre); Map<String, List<Integer>> methodLinkTypeExpectedCounts = getMethodLinkTypeToExpectedCounts(); for (Map.Entry<String, List<Long>> methodLink : methodLinkToMlssId .entrySet()) { String methodLinkType = methodLink.getKey(); if (!methodLinkTypeExpectedCounts.containsKey(methodLinkType)) continue; for (Long methodLinkSpeciesSetId : methodLink.getValue()) { Integer count = mlssIdToCount.get(methodLinkSpeciesSetId); if (count != null) { boolean countOkay = false; List<Integer> expectedCounts = methodLinkTypeExpectedCounts .get(methodLinkType); for (int expected : expectedCounts) { if (count == expected) { countOkay = true; break; } } if (!countOkay) { result = false; String expecteds = StringUtils .join(expectedCounts, ','); ReportManager.problem(this, dbre.getConnection(), "MLSS ID " + methodLinkSpeciesSetId + " of type " + methodLinkType + " count was " + count + ". We expected [" + expecteds + "]"); } } else { ReportManager.problem(this, dbre.getConnection(), "No count found for MLSS ID " + methodLinkSpeciesSetId + " of type " + methodLinkType); } } } return result; } protected Map<Long, Integer> getMlssIdCount(DatabaseRegistryEntry dbre) { return getTemplate(dbre) .queryForMap( "select mlss.method_link_species_set_id, count(*) from method_link_species_set mlss join species_set ss using (species_set_id) group by mlss.method_link_species_set_id", new DefaultMapRowMapper<Long, Integer>(Long.class, Integer.class)); } protected Map<String, List<Long>> getMethodLinkTypeToMlssId( DatabaseRegistryEntry dbre) { MapRowMapper<String, List<Long>> mapper = new AbstractStringMapRowMapper<List<Long>>() { @Override public List<Long> mapRow(ResultSet resultSet, int position) throws SQLException { List<Long> longs = CollectionUtils.createArrayList(); existingObject(longs, resultSet, position); return longs; } @Override public void existingObject(List<Long> currentValue, ResultSet resultSet, int position) throws SQLException { currentValue.add(resultSet.getLong(2)); } }; return getTemplate(dbre) .queryForMap( "select ml.type, mlss.method_link_species_set_id from method_link ml join method_link_species_set mlss using (method_link_id)", mapper); } protected Map<String, List<Integer>> getMethodLinkTypeToExpectedCounts() { Map<String, List<Integer>> output = CollectionUtils.createHashMap(); List<Integer> pairwise = Arrays.asList(2); output.put("ENSEMBL_ORTHOLOGUES", pairwise); output.put("ENSEMBL_PARALOGUES", Arrays.asList(1, 2)); output.put("BLASTZ_NET", pairwise); output.put("LASTZ_NET", pairwise); output.put("TRANSLATED_BLAT_NET", pairwise); return output; } protected Map<String, List<Integer>> getMethodLinkTypeRange() { Map<String, List<Integer>> output = CollectionUtils.createHashMap(); output.put("ENSEMBL_ORTHOLOGUES", Arrays.asList(201, 202)); output.put("ENSEMBL_PARALOGUES", Arrays.asList(202, 300)); output.put("SYNTENY", Arrays.asList(101, 200)); output.put("FAMILY", Arrays.asList(301, 400)); return output; } protected Map<String, String> getMethodLinkTypeToTable() { Map<String, String> output = CollectionUtils.createHashMap(); output.put("ENSEMBL_ORTHOLOGUES", "homology"); output.put("ENSEMBL_PARALOGUES", "homology"); output.put("SYNTENY", "synteny_region"); output.put("FAMILY", "family"); return output; } }