/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.ensembl.healthcheck.testcase.generic; import java.sql.Connection; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.DatabaseType; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.Team; import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase; import org.ensembl.healthcheck.util.DBUtils; /** * Check for HGNCs that have been assigned as display labels more than one gene. */ public class HGNCMultipleGenes extends SingleDatabaseTestCase { /** * Creates a new instance of HGNCMultipleGenes. */ public HGNCMultipleGenes() { setDescription("Check for HGNCs that have been assigned as display labels more than one gene."); setTeamResponsible(Team.CORE); } /** * This test only applies to core databases. */ public void types() { removeAppliesToType(DatabaseType.OTHERFEATURES); removeAppliesToType(DatabaseType.ESTGENE); removeAppliesToType(DatabaseType.VEGA); removeAppliesToType(DatabaseType.CDNA); removeAppliesToType(DatabaseType.RNASEQ); } /** * Run the test. * * @param dbre * The database to use. * @return Result. */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); // this has to be done the slow way, don't think there's a way to do // this all at once String sql = "SELECT DISTINCT(x.display_label), COUNT(*) AS count FROM gene g, xref x, external_db e WHERE e.external_db_id=x.external_db_id AND e.db_name LIKE 'HGNC%' AND x.xref_id=g.display_xref_id and x.display_label not like '%1 to many)' "; sql += "and g.seq_region_id NOT in (select seq_region_id FROM seq_region_attrib sa, attrib_type at WHERE at.attrib_type_id = sa.attrib_type_id AND code = 'non_ref') "; if (dbre.getType() == DatabaseType.SANGER_VEGA) {// for sangervega do // not consider // duplicates for // the haplotypes sql += "and g.seq_region_id NOT in(select seq_region_id from seq_region_attrib sa join attrib_type at on sa.attrib_type_id=at.attrib_type_id where code ='vega_ref_chrom') and (g.source='havana' or g.source='WU') "; } sql += " GROUP BY x.display_label"; if (dbre.getType() == DatabaseType.SANGER_VEGA) {// for sangervega only // count the ones // for which the // source is the // same sql += ", g.source "; } sql += " HAVING COUNT > 1"; int rows = DBUtils.getRowCount(con, sql); if (rows > 0) { ReportManager.problem(this, con, rows + " HGNC symbols have been assigned to more than one gene"); result = false; } else { ReportManager.correct(this, con, "All HGNC symbols only assigned to one gene"); } return result; } // ---------------------------------------------------------------------- }