/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Copyright (C) 2003 EBI, GRL * * This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.ensembl.healthcheck.testcase.generic; import java.sql.Connection; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.DatabaseType; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.Team; import org.ensembl.healthcheck.testcase.Priority; import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase; import org.ensembl.healthcheck.util.DBUtils; /** * Check that no HGNC xrefs have dbprimary_acc=display_label; this is usually * caused by withdrawn symbols which we don't want to include. */ public class HGNCNumeric extends SingleDatabaseTestCase { double threshold = 0.01; // Fraction of numeric identifiers below which no // warning will be issued /** * Create a new HGNCNumeric testcase. */ public HGNCNumeric() { setDescription("Check that no HGNC xrefs have dbprimary_acc=display_label"); setPriority(Priority.AMBER); setFix("Remove HGNC xrefs and object_xrefs where dbprimary_acc=display_label. Set display_xref_ids of genes that were pointing to these to null."); setEffect("Causes genes to be displayed with numeric HGNC symbols, and some dbprimary_acc=display_label for HGNC when they're not supposed to be, which confuses Mart."); setTeamResponsible(Team.CORE); } /** * Only run on core databases. */ public void types() { removeAppliesToType(DatabaseType.CDNA); removeAppliesToType(DatabaseType.VEGA); removeAppliesToType(DatabaseType.OTHERFEATURES); removeAppliesToType(DatabaseType.RNASEQ); } /** * Run the test. * * @param dbre * The database to use. * @return true if the test passed. * */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); String allSQL = "SELECT COUNT(*) FROM external_db e, xref x, object_xref ox, gene g WHERE e.external_db_id=x.external_db_id AND x.xref_id=ox.xref_id AND ox.ensembl_object_type='Gene' AND ox.ensembl_id=g.gene_id AND e.db_name LIKE 'HGNC%'"; String numericSQL = allSQL + " AND x.dbprimary_acc=x.display_label"; int rowsAll = DBUtils.getRowCount(con, allSQL); if (rowsAll == 0) { return true; // avoid division by 0 later } int rowsNumeric = DBUtils.getRowCount(con, numericSQL); double fraction = (double) rowsNumeric / (double) rowsAll; if (fraction > threshold) { ReportManager .problem( this, con, rowsNumeric + " (" + (fraction * 100) + "%) HGNC xrefs with dbprimary_acc=display_label; this will cause genes to have numeric display names, or break hyperlinks"); result = false; } else { ReportManager.correct(this, con, "All HGNC xrefs (or more than " + (threshold * 100) + "%) have different dbprimary_acc and display_label"); } return result; } // run // ---------------------------------------------------------------------- } // HGNCNumeric