/*
* Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
* Copyright [2016-2017] EMBL-European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ensembl.healthcheck.testcase.generic;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.ensembl.healthcheck.DatabaseRegistry;
import org.ensembl.healthcheck.DatabaseRegistryEntry;
import org.ensembl.healthcheck.DatabaseType;
import org.ensembl.healthcheck.ReportManager;
import org.ensembl.healthcheck.Team;
import org.ensembl.healthcheck.testcase.MultiDatabaseTestCase;
import org.ensembl.healthcheck.testcase.Priority;
import org.ensembl.healthcheck.util.ConnectionBasedSqlTemplateImpl;
import org.ensembl.healthcheck.util.MapRowMapper;
import org.ensembl.healthcheck.util.SqlTemplate;
import org.ensembl.healthcheck.util.StringListMapRowMapper;
/**
* Check that all xrefs from a certain source (e.g. HGNC, EntrezGene) are consistently assigned to the same Ensembl object type.
*/
public class XrefLevels extends MultiDatabaseTestCase {
/**
* Creates a new instance of XrefLevels
*/
public XrefLevels() {
setDescription("Check that all xrefs from a certain source (e.g. HGNC, EntrezGene) are consistently assigned to the same Ensembl object type across all species");
setPriority(Priority.AMBER);
setEffect("Causes BioMart to require specific workarounds for each case.");
setFix("Manually fix affected xrefs.");
setTeamResponsible(Team.CORE);
}
/**
* This only applies to core and Vega databases.
*/
public void types() {
removeAppliesToType(DatabaseType.OTHERFEATURES);
removeAppliesToType(DatabaseType.CDNA);
removeAppliesToType(DatabaseType.RNASEQ);
}
/**
* Run the test.
*
* @param dbr
* The database registry containing all the specified databases.
*/
public boolean run(DatabaseRegistry dbr) {
boolean result = true;
// easier to do this in SQL than Java
// Create an in-memory SQL database via h2 driver
try {
Class.forName("org.h2.Driver"); // memory:tablename
Connection tempDB = DriverManager.getConnection("jdbc:h2:mem:XrefLevels");
createTempTable(tempDB);
// master list of species, sources and objects
DatabaseRegistryEntry masterDBRE = null;
PreparedStatement masterPrep = null;
String masterTable = "healthcheck_xref";
DatabaseRegistryEntry[] dbres = dbr.getAll(DatabaseType.CORE);
if (dbres.length == 0) {
return true;
}
for (DatabaseRegistryEntry dbre : dbres) {
if (masterDBRE == null) {
masterDBRE = dbre;
masterPrep = tempDB.prepareStatement("INSERT INTO " + masterTable + " (species, source, object, database) VALUES (?,?,?,?)");
}
// fill with the list of sources and object types from each species
logger.fine("Adding sources and objects for " + dbre.getName());
Statement stmt = dbre.getConnection().createStatement();
// can't do this in one query as the databases being written to and read from might be on separate servers
ResultSet rs = stmt
.executeQuery("SELECT e.db_name, ox.ensembl_object_type FROM external_db e, xref x, object_xref ox WHERE e.external_db_id=x.external_db_id AND x.xref_id=ox.xref_id GROUP BY e.db_name, ox.ensembl_object_type");
while (rs.next()) {
String species = dbre.getSpecies().toString();
if (species == null || species.equalsIgnoreCase("unknown")) {
species = dbre.getAlias();
}
masterPrep.setString(1, species);
masterPrep.setString(2, rs.getString("db_name"));
masterPrep.setString(3, rs.getString("ensembl_object_type"));
masterPrep.setString(4, dbre.getName());
masterPrep.execute();
}
stmt.close();
}
PreparedStatement sourcePrep = tempDB.prepareStatement("select distinct source from "+masterTable);
ResultSet sources = sourcePrep.executeQuery();
while (sources.next()) {
String source = sources.getString("source");
String query = "select object,species from "+ masterTable +" where source = ?";
String queryDb = "select species,database from "+ masterTable +" where source = ?";
MapRowMapper<String,List<String>> mapper = new StringListMapRowMapper();
SqlTemplate template = new ConnectionBasedSqlTemplateImpl(tempDB);
Map<String,List<String>> map = template.queryForMap(query, mapper, source);
Map<String,List<String>> mapDb = template.queryForMap(queryDb, mapper, source);
if (map.size() != 1) {
// more than one list in the map implies there are at least two object types referenced
// figure out which species are different
String message = "Source:"+source+", types differ between species. ";
int smallest = 1000;
String smallestType = "";
for (Map.Entry<String, List<String>> entry: map.entrySet()) {
List<String> species = entry.getValue();
if (species.size() < smallest) {
smallest = species.size();
smallestType = entry.getKey();
}
message = message.concat(entry.getKey() + " has " + species.size() + " species. ");
}
List<String> minoritySpecies = map.get(smallestType);
message = message.concat("Problem species are:"+ StringUtils.join(minoritySpecies,","));
for (String species: minoritySpecies) {
List<String> minorityDatabases = mapDb.get(species);
message = message.concat(". In problem databases:"+ StringUtils.join(minorityDatabases,","));
}
ReportManager.problem(this, "", message);
result = false;
}
}
dropTempTable(tempDB);
tempDB.close();
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block for making h2 database connection.
// Somebody do this properly!
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
}
return result;
} // run
// -----------------------------------------------------------------------
private void createTempTable(Connection conn) {
// making a temporary table in memory rather than affecting production DB
try {
Statement stmt = conn.createStatement();
stmt.execute("CREATE TABLE healthcheck_xref (species VARCHAR(255), source VARCHAR(255), object VARCHAR(255), database VARCHAR(255))");
logger.fine("Created table healthcheck_xref in temporary H2 DB");
} catch (SQLException se) {
se.printStackTrace();
}
}
// -----------------------------------------------------------------------
private void dropTempTable(Connection conn) {
try {
Statement stmt = conn.createStatement();
stmt.execute("DROP TABLE IF EXISTS healthcheck_xref");
logger.fine("Dropped table healthcheck_xref in temporary DB");
} catch (SQLException se) {
se.printStackTrace();
}
}
// -----------------------------------------------------------------------
} // XrefLevels