/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Created on 09-Mar-2004 * * To change the template for this generated file go to * Window - Preferences - Java - Code Generation - Code and Comments */ package org.ensembl.healthcheck.testcase.generic; import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.sql.PreparedStatement; import java.util.Arrays; import java.util.List; import java.util.Set; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.DatabaseType; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.Species; import org.ensembl.healthcheck.Team; import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase; import org.ensembl.healthcheck.util.DBUtils; import org.ensembl.healthcheck.util.SqlTemplate; import org.apache.commons.collections.ListUtils; import org.ensembl.healthcheck.util.CollectionUtils; /** * Check if any chromosomes that have different lengths in karyotype & seq_region tables. */ public class Karyotype extends SingleDatabaseTestCase { /** * Create a new Karyotype test case. */ public Karyotype() { setDescription("Check that karyotype and seq_region tables agree"); setTeamResponsible(Team.GENEBUILD); } /** * This only applies to core and Vega databases. */ public void types() { removeAppliesToType(DatabaseType.ESTGENE); removeAppliesToType(DatabaseType.VEGA); removeAppliesToType(DatabaseType.OTHERFEATURES); removeAppliesToType(DatabaseType.CDNA); removeAppliesToType(DatabaseType.RNASEQ); } /** * Run the test. * * @param dbre * The database to use. * @return true if the test passed. * */ public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; Connection con = dbre.getConnection(); Species species = dbre.getSpecies(); Set<Species> karyotypeSpecies = CollectionUtils.createLinkedHashSet(Species.DROSOPHILA_MELANOGASTER, Species.HOMO_SAPIENS, Species.MUS_MUSCULUS, Species.RATTUS_NORVEGICUS); result &= karyotypeExists(dbre); // don't check for empty karyotype table - this is done in EmptyTables // meta_coord check also done in MetaCoord // The seq_region.length and karyotype.length should always be the // same. // The SQL returns failures if(karyotypeSpecies.contains(species) == true) { result &= checkKaryotype(dbre); } return result; } protected boolean checkKaryotype(DatabaseRegistryEntry dbre) { Connection con = dbre.getConnection(); boolean result = true; String[] seqRegionNames = DBUtils.getColumnValues(con, "SELECT s.name FROM seq_region s, coord_system cs WHERE s.coord_system_id=cs.coord_system_id AND cs.name='chromosome' AND cs.attrib='default_version' AND s.name NOT LIKE 'LRG%' AND s.name != 'MT'"); String[] patches = DBUtils.getColumnValues(con, "SELECT sr.name FROM seq_region sr, assembly_exception ae WHERE sr.seq_region_id=ae.seq_region_id AND ae.exc_type IN ('PATCH_NOVEL', 'PATCH_FIX', 'HAP')"); List<String> patchList = Arrays.asList(patches); List<String> nonPatchSeqRegions = ListUtils.removeAll(Arrays.asList(seqRegionNames), patchList); int count = 0; try { PreparedStatement stmt = con.prepareStatement("SELECT sr.name, MAX(kar.seq_region_end), sr.length FROM seq_region sr, karyotype kar WHERE sr.seq_region_id=kar.seq_region_id AND sr.name = ? GROUP BY kar.seq_region_id"); for (String seqRegion : seqRegionNames) { stmt.setString(1, seqRegion); ResultSet rs = stmt.executeQuery(); boolean hasKaryotype = false; while (rs.next() && count < 50) { hasKaryotype = true; if (patchList.contains(seqRegion)) { continue; } String chrName = rs.getString(1); int karLen = rs.getInt(2); int chrLen = rs.getInt(3); String prob = ""; int bp = 0; if (karLen > chrLen) { bp = karLen - chrLen; prob = "longer"; } else { bp = chrLen - karLen; prob = "shorter"; } if (bp > 0) { result = false; count++; ReportManager.problem(this, con, "Chromosome " + chrName + " is " + bp + "bp " + prob + " in the karyotype table than " + "in the seq_region table"); } } if (!hasKaryotype) { result = false; ReportManager.problem(this, con, "Chromosome " + seqRegion + " has no karyotype data"); } } } catch (SQLException e) { e.printStackTrace(); } if (count == 0) { ReportManager.correct(this, con, "Chromosome lengths are the same" + " in karyotype and seq_region tables"); } return result; } // run protected boolean karyotypeExists(DatabaseRegistryEntry dbre) { Connection con = dbre.getConnection(); SqlTemplate t = DBUtils.getSqlTemplate(dbre); boolean result = true; String sqlCS = "SELECT count(*) FROM coord_system WHERE name = 'chromosome'"; String sqlAttrib = "SELECT count(*) FROM seq_region_attrib sa, attrib_type at WHERE at.attrib_type_id = sa.attrib_type_id AND code = 'karyotype_rank'"; String sqlMT = "SELECT count(*) FROM seq_region_attrib sa, attrib_type at, seq_region s WHERE s.seq_region_id = sa.seq_region_id AND at.attrib_type_id = sa.attrib_type_id AND code = 'karyotype_rank' AND s.name IN ('MT', 'Mito', 'dmel_mitochondrion_genome', 'MtDNA')"; int karyotype = t.queryForDefaultObject(sqlCS, Integer.class); if (karyotype > 0) { int attrib = t.queryForDefaultObject(sqlAttrib, Integer.class); if (attrib < 2) { result = false; ReportManager.problem(this, con, "Chromosome entry exists but no karyotype attrib is present"); } int mt = t.queryForDefaultObject(sqlMT, Integer.class); if (mt == 0 && dbre.getType() != DatabaseType.SANGER_VEGA) { result = false; ReportManager.problem(this, con, "Species has chromosomes but neither MT nor Mito nor dmel_mitochondrion_genome nor MtDNA"); } } return result; } } // Karyotype