/* * Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute * Copyright [2016-2017] EMBL-European Bioinformatics Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Copyright (C) 2003 EBI, GRL * * This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.ensembl.healthcheck.testcase.generic; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.text.DecimalFormat; import java.util.EnumSet; import java.util.HashMap; import java.util.Iterator; import java.util.logging.Logger; import org.ensembl.healthcheck.DatabaseRegistryEntry; import org.ensembl.healthcheck.DatabaseType; import org.ensembl.healthcheck.ReportManager; import org.ensembl.healthcheck.Species; import org.ensembl.healthcheck.Team; import org.ensembl.healthcheck.testcase.SingleDatabaseTestCase; import org.ensembl.healthcheck.util.DBUtils; /** * Compare the gene names in the current database with those from the equivalent database on the secondary server. */ public class ComparePreviousVersionProjectedGeneNames extends SingleDatabaseTestCase { /** * Create a new testcase. */ public ComparePreviousVersionProjectedGeneNames() { setDescription("Compare gene names in the current database with those from the equivalent database on the secondary server."); setTeamResponsible(Team.CORE); setSecondTeamResponsible(Team.RELEASE_COORDINATOR); } /** * This only applies to core and Vega databases. */ public void types() { removeAppliesToType(DatabaseType.OTHERFEATURES); removeAppliesToType(DatabaseType.ESTGENE); removeAppliesToType(DatabaseType.CDNA); removeAppliesToType(DatabaseType.RNASEQ); } // ---------------------------------------------------------------------- public boolean run(DatabaseRegistryEntry dbre) { boolean result = true; boolean nochange = true; Connection currentCon = dbre.getConnection(); DatabaseRegistryEntry previous = getEquivalentFromSecondaryServer(dbre); if (previous == null) { return result; } Connection previousCon = previous.getConnection(); // Get data from previous database, compare each one with equivalent on // current float displayXrefCount = new Integer(DBUtils.getRowCount(currentCon, "SELECT COUNT(1) FROM gene WHERE display_xref_id IS NOT NULL" ) ); float displayXrefPreviousCount = new Integer(DBUtils.getRowCount(previousCon, "SELECT COUNT(1) FROM gene WHERE display_xref_id IS NOT NULL" ) ); float PreviousCount = new Integer(DBUtils.getRowCount(previousCon, "SELECT COUNT(1) FROM gene" ) ); if (displayXrefCount == 0 || displayXrefPreviousCount == 0 ) { ReportManager.problem(this, currentCon, "display xref count is 0 in the current or previous database"); result = false; return result; } String previousSQL = "SELECT stable_id, db_name, dbprimary_acc FROM gene LEFT JOIN xref ON display_xref_id = xref_id LEFT JOIN external_db USING(external_db_id) WHERE xref.info_type = 'PROJECTION'"; String currentSQL = "SELECT stable_id, db_name, dbprimary_acc FROM gene LEFT JOIN xref ON display_xref_id = xref_id LEFT JOIN external_db USING(external_db_id) WHERE xref.info_type = 'PROJECTION' AND stable_id = ?"; int missingIds = 0; int accessionsChanged = 0; HashMap < String, Integer > changeCounts = new HashMap < String, Integer >(); HashMap < String, String > exampleStableIds = new HashMap < String, String >(); try { PreparedStatement previousStmt = previousCon.prepareStatement(previousSQL); PreparedStatement currentStmt = currentCon.prepareStatement(currentSQL); ResultSet previousRS = previousStmt.executeQuery(); while (previousRS.next()) { String stableId = previousRS.getString(1); String previousDbName = previousRS.getString(2); String previousAccession = previousRS.getString(3); currentStmt.setString(1, stableId); ResultSet currentRS = currentStmt.executeQuery(); if (currentRS == null) { missingIds ++; currentRS.close(); continue; } if (!currentRS.next()) { missingIds ++; currentRS.close(); continue; } String currentDbName = currentRS.getString(2); String currentAccession = currentRS.getString(3); if (previousDbName == null) { previousDbName = "null"; } if (previousAccession == null) { previousAccession = "null"; } if (currentDbName == null) { currentDbName = "null"; } if (currentAccession == null) { currentAccession = "null"; } if (!currentAccession.equals(previousAccession) && currentDbName.equals(previousDbName) ) { //store counts of display xrefs where accession changed - same source accessionsChanged ++; } if (!currentDbName.equals(previousDbName) ) { //store counts of display xrefs where source changed String dbNames = previousDbName + " to " + currentDbName; if (changeCounts.containsKey(dbNames) ) { int changeCount = changeCounts.get(dbNames); changeCount ++; changeCounts.put(dbNames, changeCount); if (changeCount <= 3) { String exampleSt = exampleStableIds.get(dbNames); exampleSt += " " + stableId; exampleStableIds.put(dbNames, exampleSt); } } else { changeCounts.put(dbNames,1); exampleStableIds.put(dbNames, ", e.g. " + stableId); } } currentRS.close(); } previousRS.close(); currentStmt.close(); previousStmt.close(); } catch (SQLException e) { System.err.println("Error executing SQL"); e.printStackTrace(); } float changedSource = 0; float totalCount = 0; float percentageChange = 0; if ( changeCounts.size() > 0 || accessionsChanged > 0 ) { Iterator<String> iter = changeCounts.keySet().iterator(); while(iter.hasNext()) { String key = iter.next(); int changeCount = changeCounts.get(key); changedSource += changeCount; } totalCount = changedSource + accessionsChanged; percentageChange = totalCount/PreviousCount * 100 ; if (percentageChange > 2) { ReportManager.info(this, currentCon, "Overall gene display xrefs have changed by " +percentageChange); nochange = false; } } if (!nochange) { DecimalFormat twoDForm = new DecimalFormat("#.##"); float percentage = missingIds/displayXrefPreviousCount * 100; percentage = Float.valueOf(twoDForm.format(percentage)); if (missingIds > 0 && percentage > 5) { ReportManager.problem(this, currentCon, missingIds + "(" + percentage + "%) genes lack projected names in the current database "); result = false; } percentage = accessionsChanged/displayXrefPreviousCount * 100; percentage = Float.valueOf(twoDForm.format(percentage)); if (accessionsChanged > 50 && percentage > 5) { ReportManager.problem(this, currentCon, accessionsChanged + "(" +percentage + "%) display xref primary accessions changed for the same source "); result = false; } percentageChange = changedSource/displayXrefPreviousCount * 100 ; percentageChange = Float.valueOf(twoDForm.format(percentageChange)); //print out counts and percentages of changes Iterator<String> iter = changeCounts.keySet().iterator(); while(iter.hasNext()) { String key = iter.next(); int changeCount = changeCounts.get(key); percentage = changeCount/displayXrefPreviousCount * 100; percentage = Float.valueOf(twoDForm.format(percentage)); if (percentage > 5 && changeCount > 50) { ReportManager.problem(this, currentCon, changeCount +"("+ percentage +"%) gene display xrefs changed source from " + key + exampleStableIds.get(key) ); result = false; } else if (changeCount == 0) { ReportManager.problem(this, currentCon, "Source " + key + " does not appear in the new database any more"); result = false; } } } return result; } // ---------------------------------------------------------------------- } // ComparePreviousVersionProjectedGeneNames