/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ensembl.healthcheck.testcase.generic; import java.sql.Connection; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.Team; import org.ensembl.healthcheck.testcase.Priority; import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase; import org.ensembl.healthcheck.util.DBUtils; /** * Check for duplicated rows in various *_attrib tables. */ public class DuplicateAttributes extends SingleDatabaseTestCase { double THRESHOLD = 0.0; // fraction of non-unique rows must be greater than this for a warning to occur String[] attribs = { "gene", "transcript", "translation", "seq_region", "misc" }; /** * Creates a new instance of DuplicateAttributes */ public DuplicateAttributes() { setDescription("Check for duplicated rows in various *_attrib tables."); setPriority(Priority.AMBER); setEffect("Many duplicates can cause serious performance problems."); setFix("Remove duplicated rows if appropriate."); setTeamResponsible(Team.GENEBUILD); } /** * Run the test. * * @param dbre * The database registry containing all the specified databases. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); for (String attrib : attribs) { String table = attrib + "_attrib"; String column = attrib.equals("misc") ? "misc_feature_id" : attrib + "_id"; logger.finest("Checking " + table); int totalRows = DBUtils.getRowCount(con, "SELECT COUNT(*) FROM " + table); if (totalRows == 0) { // avoid division by zero continue; } int uniqueRows = DBUtils.getRowCount(con, "SELECT COUNT(DISTINCT " + column + ", attrib_type_id, value) FROM " + table); int duplicates = totalRows - uniqueRows; if ((double) duplicates / (double) totalRows > THRESHOLD) { ReportManager.problem(this, con, table + " has " + totalRows + " rows in total but only " + uniqueRows + " are unique"); result = false; } else { ReportManager.correct(this, con, "No duplicated rows in " + table); } } return result; } // run } // DuplicateAttributes