/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Copyright (C) 2004 EBI, GRL * * This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.ensembl.healthcheck.testcase.generic; import java.sql.Connection; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.Arrays; // import org.apache.commons.collections.CollectionUtils; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.DatabaseType; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.Species; import org.ensembl.healthcheck.Team; import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase; import org.ensembl.healthcheck.util.DBUtils; import org.ensembl.healthcheck.util.Utils; import org.ensembl.healthcheck.util.SqlTemplate; import org.ensembl.healthcheck.util.CollectionUtils; /** * Check that all tables have data. */ public class EmptyTables extends SingleDatabaseTestCase { // list of tables grouped by category private Set<String> idMappingTables, densityTables, markerTables, miscTables, karyotypeTables, ditagDataTables; // list of species which contain data for the above tables private Set<Species> idMappingSpecies, densitySpecies, markerSpecies, miscSpecies, karyotypeSpecies, ditagDataSpecies; // a map of database types to a list of tables which are // allowed to be empty for that particular db type Map<DatabaseType, Set<String> > allowedEmptyTablesMap; /** * Creates a new instance of EmptyTablesTestCase */ public EmptyTables() { setDescription("Checks that all tables have data"); setTeamResponsible(Team.GENEBUILD); setSecondTeamResponsible(Team.RELEASE_COORDINATOR); // initialize lists of tables grouped by category // and the corresponding species initTablesAndSpeciesLists(); } // --------------------------------------------------------------------- /** * Define what tables are to be checked. */ private Set<String> getTablesToCheck(final DatabaseRegistryEntry dbre) { Species species = dbre.getSpecies(); DatabaseType type = dbre.getType(); boolean karyotype = karyotypeExists(dbre); Set<String> tables = this.getTableNames(dbre, type, species); // ---------------------------------------------------- // the list of tables to check for ancestral sequences is // already set by method getTableNames if(species == Species.ANCESTRAL_SEQUENCES) { return tables; } // do no check for emptyness for some tables for certain // database types (i.e. CORE and VEGA), and certain species if(allowedEmptyTablesMap.containsKey(type)) { tables.removeAll(allowedEmptyTablesMap.get(type)); if(idMappingSpecies.contains(species) == false) { tables.removeAll(idMappingTables); // don't check id mapping tables } if(densitySpecies.contains(species) == false && !karyotype) { // don't check density tables // WARNING: these tables are populated only for species with a karyotype tables.removeAll(densityTables); } if(markerSpecies.contains(species) == false) { // don't check marker tables tables.removeAll(markerTables); } if(miscSpecies.contains(species) == false) { // don't check misc tables tables.removeAll(miscTables); } if(karyotypeSpecies.contains(species) == false) { // don't check karyotype banding tables tables.removeAll(karyotypeTables); } if(ditagDataSpecies.contains(species) == false) { // don't check ditag data tables tables.removeAll(ditagDataTables); } } // ad-hoc adjustment for zebrafish in VEGA if (type == DatabaseType.VEGA && species == Species.DANIO_RERIO) { tables.remove("ontology_xref"); } return tables; } // --------------------------------------------------------------------- /** * Check that every table has more than 0 rows. * * @param dbre * The database to check. * @return true if the test passed. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Set<String> tables = getTablesToCheck(dbre); Connection con = dbre.getConnection(); for (String table: tables) { // logger.finest("Checking that " + table + " has rows"); if (!tableHasRows(con, table)) { ReportManager.problem(this, con, table + " has zero rows"); result = false; } } if (result) { ReportManager.correct(this, con, "All required tables have data"); } return result; } // run // ----------------------------------------------------------------- protected boolean karyotypeExists(DatabaseRegistryEntry dbre) { Connection con = dbre.getConnection(); SqlTemplate t = DBUtils.getSqlTemplate(dbre); boolean result = true; String sqlKaryotype = "SELECT count(*) FROM seq_region_attrib sa, attrib_type at WHERE at.attrib_type_id = sa.attrib_type_id AND code = 'karyotype_rank'"; int karyotype = t.queryForDefaultObject(sqlKaryotype, Integer.class); if (karyotype == 0) { result = false; } return result; } // ----------------------------------------------------------------- private Set<String> getTableNames(final DatabaseRegistryEntry dbre, final DatabaseType type, final Species species) { if (species == Species.ANCESTRAL_SEQUENCES) { // Only a few tables need to be filled in ancestral databases return CollectionUtils.createLinkedHashSet("meta", "coord_system", "dna", "seq_region"); } if (type == DatabaseType.OTHERFEATURES || type == DatabaseType.CDNA) { // Only a few tables need to be filled in EST return CollectionUtils.createLinkedHashSet("analysis", "analysis_description", "assembly", "attrib_type", "coord_system", "dna_align_feature", "external_db", "meta_coord", "meta", "misc_set", "seq_region", "seq_region_attrib", "unmapped_reason"); } else if (type == DatabaseType.RNASEQ ) { // the same for RNASEQ return CollectionUtils.createLinkedHashSet("analysis", "analysis_description", "assembly", "attrib_type", "coord_system", "data_file", "dna_align_feature", "external_db", "meta_coord", "meta", "misc_set", "seq_region", "seq_region_attrib", "unmapped_reason"); } // get the full list of tables Set<String> tables = CollectionUtils.createLinkedHashSet(DBUtils.getTableNames(dbre.getConnection())); // remove views since we don't care if they're empty Set<String> views = CollectionUtils.createLinkedHashSet(DBUtils.getViews(dbre.getConnection()).toArray(new String[]{})); tables.removeAll(views); // remove backup tables (starting with backup_) // they are allowed to be empty Set<String> backUpTables = new HashSet<String>(); for (String table : tables) { if (table.startsWith("backup_")) { backUpTables.add(table); } } tables.removeAll(backUpTables); return tables; } private void initTablesAndSpeciesLists() { /* NOTE: If a species list is empty, it is assumed all available species have data on the corresponding tables */ // ID mapping tables, species idMappingTables = CollectionUtils.createLinkedHashSet("gene_archive", "peptide_archive", "mapping_session", "stable_id_event"); idMappingSpecies = new HashSet<Species>(); // ID mapping related tables are checked in a separate test case // density tables and species densityTables = CollectionUtils.createLinkedHashSet("density_feature", "density_type"); densitySpecies = new HashSet<Species>(); // all species should have density mapping data (provided they have karyotype) // marker tables and species markerTables = CollectionUtils.createLinkedHashSet("map", "marker", "marker_map_location", "marker_synonym", "marker_feature"); markerSpecies = CollectionUtils.createLinkedHashSet(Species.HOMO_SAPIENS, Species.MUS_MUSCULUS, Species.RATTUS_NORVEGICUS, Species.DANIO_RERIO, Species.BOS_TAURUS, Species.CANIS_FAMILIARIS, Species.GALLUS_GALLUS, Species.MACACA_MULATTA, Species.SUS_SCROFA); // misc tables and species miscTables = CollectionUtils.createLinkedHashSet("misc_feature", "misc_feature_misc_set", "misc_set", "misc_attrib"); miscSpecies = CollectionUtils.createLinkedHashSet(Species.HOMO_SAPIENS, Species.DANIO_RERIO); // karyotype banding tables and species // only certain species have a karyotype banding karyotypeTables = CollectionUtils.createLinkedHashSet("karyotype"); karyotypeSpecies = CollectionUtils.createLinkedHashSet(Species.DROSOPHILA_MELANOGASTER, Species.HOMO_SAPIENS, Species.MUS_MUSCULUS, Species.RATTUS_NORVEGICUS); // ditag data tables and species // only human, mouse and medaka currently have ditag data ditagDataTables = CollectionUtils.createLinkedHashSet("ditag", "ditag_feature"); ditagDataSpecies = CollectionUtils.createLinkedHashSet(Species.HOMO_SAPIENS, Species.MUS_MUSCULUS, Species.ORYZIAS_LATIPES); // init map of database types to a list of tables which are // allowed to be empty for that particular db type allowedEmptyTablesMap = new HashMap<DatabaseType, Set<String> >(); allowedEmptyTablesMap.put(DatabaseType.CORE, CollectionUtils.createLinkedHashSet("alt_allele", "alt_allele_attrib", "alt_allele_group", "assembly_exception", "data_file", "dnac", "seq_region_mapping", "unconventional_transcript_association", "operon", "operon_transcript", "operon_transcript_gene", "intron_supporting_evidence", "transcript_intron_supporting_evidence", "associated_xref", "associated_group", "qtl", "qtl_feature", "qtl_synonym")); allowedEmptyTablesMap.put(DatabaseType.VEGA, CollectionUtils.createLinkedHashSet("alt_allele", "alt_allele_attrib", "alt_allele_group", "assembly_exception", "data_file", "dnac", "seq_region_mapping", "unconventional_transcript_association", "operon", "operon_transcript", "operon_transcript_gene", "intron_supporting_evidence", "transcript_intron_supporting_evidence", "associated_xref", "associated_group", "qtl", "qtl_feature", "qtl_synonym", "affy_array", "affy_feature", "affy_probe", "ditag", "ditag_feature", "dna", "external_synonym", "identity_xref", "map", "mapping_session", "marker", "marker_feature", "marker_map_location", "marker_synonym", "misc_attrib", "misc_feature", "misc_feature_misc_set", "misc_set", "prediction_exon", "prediction_transcript", "repeat_consensus", "repeat_feature", "simple_feature", "supporting_feature", "transcript_attrib", "unconventional_transcript_association", "dependent_xref", "seq_region_synonym", "density_feature", "unmapped_object", "mapping_set", "density_type", "genome_statistics")); } } // EmptyTablesTestCase